[llvm] fe68fb6 - [AArch64] Make the list of LSE supported operations explicit (#171126)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 9 06:22:46 PST 2025


Author: David Green
Date: 2025-12-09T14:22:42Z
New Revision: fe68fb62eabf65995a2abe05c9710fce22b14620

URL: https://github.com/llvm/llvm-project/commit/fe68fb62eabf65995a2abe05c9710fce22b14620
DIFF: https://github.com/llvm/llvm-project/commit/fe68fb62eabf65995a2abe05c9710fce22b14620.diff

LOG: [AArch64] Make the list of LSE supported operations explicit (#171126)

Similar to #167760 this makes the list of LSE atomics explicit in case
new operations are added in the future. UIncWrap, UDecWrap, USubCond and
USubSat are excluded.

Fixes #170450

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/atomic-ops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d1441a744eee8..e140aabb9bbeb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29759,12 +29759,26 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
                                AI->getOperation() == AtomicRMWInst::FMinimum))
     return AtomicExpansionKind::None;
 
-  // Nand is not supported in LSE.
   // Leave 128 bits to LLSC or CmpXChg.
-  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
-      !AI->isFloatingPointOperation()) {
-    if (Subtarget->hasLSE())
-      return AtomicExpansionKind::None;
+  if (Size < 128 && !AI->isFloatingPointOperation()) {
+    if (Subtarget->hasLSE()) {
+      // Nand is not supported in LSE.
+      switch (AI->getOperation()) {
+      case AtomicRMWInst::Xchg:
+      case AtomicRMWInst::Add:
+      case AtomicRMWInst::Sub:
+      case AtomicRMWInst::And:
+      case AtomicRMWInst::Or:
+      case AtomicRMWInst::Xor:
+      case AtomicRMWInst::Max:
+      case AtomicRMWInst::Min:
+      case AtomicRMWInst::UMax:
+      case AtomicRMWInst::UMin:
+        return AtomicExpansionKind::None;
+      default:
+        break;
+      }
+    }
     if (Subtarget->outlineAtomics()) {
       // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
       // Don't outline them unless

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index 1c2edd39e268d..adfb4ab10108a 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,INLINE_ATOMICS
-; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefixes=CHECK,OUTLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,INLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefixes=CHECK,OUTLINE_ATOMICS
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefixes=CHECK,LSE
 
 @var8 = dso_local global i8 0
 @var16 = dso_local global i16 0
@@ -30,6 +31,13 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_add_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldaddalb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw add ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
@@ -57,6 +65,13 @@ define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_add_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldaddah w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw add ptr @var16, i16 %offset acquire
    ret i16 %old
 }
@@ -84,6 +99,13 @@ define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_add_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldaddl w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw add ptr @var32, i32 %offset release
    ret i32 %old
 }
@@ -111,6 +133,13 @@ define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_add_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldadd x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw add ptr @var64, i64 %offset monotonic
    ret i64 %old
 }
@@ -139,6 +168,14 @@ define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd1_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_sub_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    neg w8, w0
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldaddb w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw sub ptr @var8, i8 %offset monotonic
    ret i8 %old
 }
@@ -167,6 +204,14 @@ define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd2_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_sub_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    neg w8, w0
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldaddlh w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw sub ptr @var16, i16 %offset release
    ret i16 %old
 }
@@ -195,6 +240,14 @@ define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd4_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_sub_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    neg w8, w0
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldadda w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw sub ptr @var32, i32 %offset acquire
    ret i32 %old
 }
@@ -223,6 +276,14 @@ define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldadd8_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_sub_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    neg x8, x0
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldaddal x8, x0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw sub ptr @var64, i64 %offset seq_cst
    ret i64 %old
 }
@@ -251,6 +312,14 @@ define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr1_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_and_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    mvn w8, w0
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldclrlb w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw and ptr @var8, i8 %offset release
    ret i8 %old
 }
@@ -279,6 +348,14 @@ define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr2_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_and_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    mvn w8, w0
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldclrh w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw and ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
@@ -307,6 +384,14 @@ define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr4_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_and_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    mvn w8, w0
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldclral w8, w0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw and ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -335,6 +420,14 @@ define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldclr8_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_and_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    mvn x8, x0
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldclra x8, x0, [x9]
+; LSE-NEXT:    ret
    %old = atomicrmw and ptr @var64, i64 %offset acquire
    ret i64 %old
 }
@@ -362,6 +455,13 @@ define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset1_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_or_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldsetalb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw or ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
@@ -389,6 +489,13 @@ define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset2_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_or_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldseth w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw or ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
@@ -416,6 +523,13 @@ define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset4_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_or_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldseta w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw or ptr @var32, i32 %offset acquire
    ret i32 %old
 }
@@ -443,6 +557,13 @@ define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldset8_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_or_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldsetl x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw or ptr @var64, i64 %offset release
    ret i64 %old
 }
@@ -470,6 +591,13 @@ define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor1_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xor_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldeorab w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xor ptr @var8, i8 %offset acquire
    ret i8 %old
 }
@@ -497,6 +625,13 @@ define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor2_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xor_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldeorlh w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xor ptr @var16, i16 %offset release
    ret i16 %old
 }
@@ -524,6 +659,13 @@ define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor4_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xor_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldeoral w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xor ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
@@ -551,6 +693,13 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_ldeor8_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xor_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldeor x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xor ptr @var64, i64 %offset monotonic
    ret i64 %old
 }
@@ -577,6 +726,13 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp1_relax
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xchg_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    swpb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xchg ptr @var8, i8 %offset monotonic
    ret i8 %old
 }
@@ -603,6 +759,13 @@ define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp2_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xchg_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    swpalh w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst
    ret i16 %old
 }
@@ -630,6 +793,13 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xchg_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    swpl w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset release
    ret i32 %old
 }
@@ -656,633 +826,1556 @@ define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_swp8_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_xchg_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    swpa x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw xchg ptr @var64, i64 %offset acquire
    ret i64 %old
 }
 
 
 define dso_local i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB24_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w10, [x9]
-; CHECK-NEXT:    sxtb w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxtb
-; CHECK-NEXT:    csel w10, w10, w0, le
-; CHECK-NEXT:    stxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB24_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_min_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; INLINE_ATOMICS-NEXT:    sxtb w8, w10
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; INLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB24_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB24_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE_ATOMICS-NEXT:    stxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB24_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_min_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldsminab w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw min ptr @var8, i8 %offset acquire
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB25_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w10, [x9]
-; CHECK-NEXT:    sxth w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxth
-; CHECK-NEXT:    csel w10, w10, w0, le
-; CHECK-NEXT:    stlxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB25_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_min_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w10, [x9]
+; INLINE_ATOMICS-NEXT:    sxth w8, w10
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; INLINE_ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB25_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB25_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, le
+; OUTLINE_ATOMICS-NEXT:    stlxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB25_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_min_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldsminlh w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw min ptr @var16, i16 %offset release
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB26_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, le
-; CHECK-NEXT:    stxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB26_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_min_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, le
+; INLINE_ATOMICS-NEXT:    stxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB26_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB26_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, le
+; OUTLINE_ATOMICS-NEXT:    stxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB26_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_min_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldsmin w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw min ptr @var32, i32 %offset monotonic
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_min_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB27_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, le
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB27_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x8, x0, le
+; INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_min_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB27_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, le
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB27_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_min_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldsminal x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw min ptr @var64, i64 %offset seq_cst
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB28_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w10, [x9]
-; CHECK-NEXT:    sxtb w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxtb
-; CHECK-NEXT:    csel w10, w10, w0, gt
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB28_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; INLINE_ATOMICS-NEXT:    sxtb w8, w10
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB28_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxtb w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB28_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_max_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldsmaxalb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw max ptr @var8, i8 %offset seq_cst
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB29_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w10, [x9]
-; CHECK-NEXT:    sxth w8, w10
-; CHECK-NEXT:    cmp w8, w0, sxth
-; CHECK-NEXT:    csel w10, w10, w0, gt
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB29_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_max_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrh w10, [x9]
+; INLINE_ATOMICS-NEXT:    sxth w8, w10
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB29_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB29_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrh w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    sxth w8, w10
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, sxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB29_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_max_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldsmaxah w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw max ptr @var16, i16 %offset acquire
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB30_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, gt
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB30_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_max_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, gt
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB30_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB30_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, gt
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB30_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_max_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldsmaxl w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw max ptr @var32, i32 %offset release
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_max_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB31_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, gt
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB31_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_max_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x8, x0, gt
+; INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB31_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_max_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB31_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, gt
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB31_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_max_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldsmax x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw max ptr @var64, i64 %offset monotonic
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:  .LBB32_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, ls
-; CHECK-NEXT:    stxrb w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB32_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umin_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x8, var8
+; INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; INLINE_ATOMICS-NEXT:    and w9, w0, #0xff
+; INLINE_ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrb w0, [x8]
+; INLINE_ATOMICS-NEXT:    cmp w0, w9
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w9, ls
+; INLINE_ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB32_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    and w9, w0, #0xff
+; OUTLINE_ATOMICS-NEXT:  .LBB32_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    cmp w0, w9
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w9, ls
+; OUTLINE_ATOMICS-NEXT:    stxrb w11, w10, [x8]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB32_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umin_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    lduminb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    add x8, x8, :lo12:var16
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:  .LBB33_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrh w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, ls
-; CHECK-NEXT:    stxrh w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB33_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umin_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x8, var16
+; INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; INLINE_ATOMICS-NEXT:    and w9, w0, #0xffff
+; INLINE_ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrh w0, [x8]
+; INLINE_ATOMICS-NEXT:    cmp w0, w9
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w9, ls
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB33_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var16
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    and w9, w0, #0xffff
+; OUTLINE_ATOMICS-NEXT:  .LBB33_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrh w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    cmp w0, w9
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w9, ls
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB33_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umin_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    lduminah w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB34_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, ls
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB34_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB34_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, ls
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB34_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umin_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    lduminal w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umin ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umin_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB35_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, ls
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB35_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umin_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x8, x0, ls
+; INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB35_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umin_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB35_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, ls
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB35_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umin_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    lduminal x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umin ptr @var64, i64 %offset acq_rel
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var8
-; CHECK-NEXT:    add x8, x8, :lo12:var8
-; CHECK-NEXT:    and w9, w0, #0xff
-; CHECK-NEXT:  .LBB36_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxrb w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, hi
-; CHECK-NEXT:    stlxrb w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB36_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umax_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x8, var8
+; INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; INLINE_ATOMICS-NEXT:    and w9, w0, #0xff
+; INLINE_ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxrb w0, [x8]
+; INLINE_ATOMICS-NEXT:    cmp w0, w9
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w9, hi
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB36_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var8
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:    and w9, w0, #0xff
+; OUTLINE_ATOMICS-NEXT:  .LBB36_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxrb w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    cmp w0, w9
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w9, hi
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x8]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB36_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umax_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    ldumaxalb w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, var16
-; CHECK-NEXT:    add x8, x8, :lo12:var16
-; CHECK-NEXT:    and w9, w0, #0xffff
-; CHECK-NEXT:  .LBB37_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w0, [x8]
-; CHECK-NEXT:    cmp w0, w9
-; CHECK-NEXT:    csel w10, w0, w9, hi
-; CHECK-NEXT:    stxrh w11, w10, [x8]
-; CHECK-NEXT:    cbnz w11, .LBB37_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umax_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x8, var16
+; INLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; INLINE_ATOMICS-NEXT:    and w9, w0, #0xffff
+; INLINE_ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w0, [x8]
+; INLINE_ATOMICS-NEXT:    cmp w0, w9
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w9, hi
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB37_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x8, var16
+; OUTLINE_ATOMICS-NEXT:    add x8, x8, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:    and w9, w0, #0xffff
+; OUTLINE_ATOMICS-NEXT:  .LBB37_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w0, [x8]
+; OUTLINE_ATOMICS-NEXT:    cmp w0, w9
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w9, hi
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x8]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB37_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umax_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    ldumaxh w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB38_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csel w10, w8, w0, hi
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB38_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB38_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w8, w0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB38_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umax_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    ldumaxal w0, w0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umax ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_umax_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB39_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csel x10, x8, x0, hi
-; CHECK-NEXT:    stlxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB39_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_umax_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x8, x0, hi
+; INLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB39_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_umax_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB39_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x8, x0, hi
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB39_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_umax_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    ldumaxl x0, x0, [x8]
+; LSE-NEXT:    ret
    %old = atomicrmw umax ptr @var64, i64 %offset release
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_uinc_wrap_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_uinc_wrap_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB40_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxtb
-; CHECK-NEXT:    csinc w10, wzr, w8, hs
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB40_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB40_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; INLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB40_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB40_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB40_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_uinc_wrap_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldrb w8, [x8, :lo12:var8]
+; LSE-NEXT:  .LBB40_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xff
+; LSE-NEXT:    cmp w10, w0, uxtb
+; LSE-NEXT:    csinc w11, wzr, w8, hs
+; LSE-NEXT:    caslb w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB40_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw uinc_wrap ptr @var8, i8 %offset release
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_uinc_wrap_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_uinc_wrap_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB41_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    csinc w10, wzr, w8, hs
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB41_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB41_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; INLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB41_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB41_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB41_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_uinc_wrap_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldrh w8, [x8, :lo12:var16]
+; LSE-NEXT:  .LBB41_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xffff
+; LSE-NEXT:    cmp w10, w0, uxth
+; LSE-NEXT:    csinc w11, wzr, w8, hs
+; LSE-NEXT:    cash w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB41_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw uinc_wrap ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_uinc_wrap_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_uinc_wrap_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB42_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    csinc w10, wzr, w8, hs
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB42_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB42_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    csinc w10, wzr, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB42_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_uinc_wrap_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldr w8, [x8, :lo12:var32]
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:  .LBB42_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    cmp w8, w0
+; LSE-NEXT:    csinc w11, wzr, w8, hs
+; LSE-NEXT:    casal w10, w11, [x9]
+; LSE-NEXT:    cmp w10, w8
+; LSE-NEXT:    mov w8, w10
+; LSE-NEXT:    b.ne .LBB42_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw uinc_wrap ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_uinc_wrap_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_uinc_wrap_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB43_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    csinc x10, xzr, x8, hs
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB43_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB43_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    csinc x10, xzr, x8, hs
+; INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB43_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_uinc_wrap_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB43_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    csinc x10, xzr, x8, hs
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB43_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_uinc_wrap_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldr x8, [x8, :lo12:var64]
+; LSE-NEXT:    mov x10, x8
+; LSE-NEXT:  .LBB43_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    cmp x8, x0
+; LSE-NEXT:    csinc x11, xzr, x8, hs
+; LSE-NEXT:    casa x10, x11, [x9]
+; LSE-NEXT:    cmp x10, x8
+; LSE-NEXT:    mov x8, x10
+; LSE-NEXT:    b.ne .LBB43_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov x0, x8
+; LSE-NEXT:    ret
    %old = atomicrmw uinc_wrap ptr @var64, i64 %offset acquire
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_udec_wrap_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_udec_wrap_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB44_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxtb
-; CHECK-NEXT:    sub w10, w8, #1
-; CHECK-NEXT:    ccmp w8, #0, #4, ls
-; CHECK-NEXT:    csel w10, w0, w10, eq
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB44_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB44_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; INLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; INLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB44_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB44_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; OUTLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB44_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_udec_wrap_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldrb w8, [x8, :lo12:var8]
+; LSE-NEXT:  .LBB44_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xff
+; LSE-NEXT:    sub w11, w8, #1
+; LSE-NEXT:    cmp w10, w0, uxtb
+; LSE-NEXT:    ccmp w10, #0, #4, ls
+; LSE-NEXT:    csel w11, w0, w11, eq
+; LSE-NEXT:    caslb w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB44_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw udec_wrap ptr @var8, i8 %offset release
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_udec_wrap_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_udec_wrap_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB45_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    sub w10, w8, #1
-; CHECK-NEXT:    ccmp w8, #0, #4, ls
-; CHECK-NEXT:    csel w10, w0, w10, eq
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB45_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB45_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; INLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; INLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB45_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB45_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; OUTLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB45_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_udec_wrap_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldrh w8, [x8, :lo12:var16]
+; LSE-NEXT:  .LBB45_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xffff
+; LSE-NEXT:    sub w11, w8, #1
+; LSE-NEXT:    cmp w10, w0, uxth
+; LSE-NEXT:    ccmp w10, #0, #4, ls
+; LSE-NEXT:    csel w11, w0, w11, eq
+; LSE-NEXT:    cash w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB45_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw udec_wrap ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_udec_wrap_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_udec_wrap_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB46_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    cmp w8, w0
-; CHECK-NEXT:    sub w10, w8, #1
-; CHECK-NEXT:    ccmp w8, #0, #4, ls
-; CHECK-NEXT:    csel w10, w0, w10, eq
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB46_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp w8, w0
+; INLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; INLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; INLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB46_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, #1
+; OUTLINE_ATOMICS-NEXT:    ccmp w8, #0, #4, ls
+; OUTLINE_ATOMICS-NEXT:    csel w10, w0, w10, eq
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB46_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_udec_wrap_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldr w8, [x8, :lo12:var32]
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:  .LBB46_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    cmp w8, w0
+; LSE-NEXT:    sub w11, w8, #1
+; LSE-NEXT:    ccmp w8, #0, #4, ls
+; LSE-NEXT:    csel w11, w0, w11, eq
+; LSE-NEXT:    casal w10, w11, [x9]
+; LSE-NEXT:    cmp w10, w8
+; LSE-NEXT:    mov w8, w10
+; LSE-NEXT:    b.ne .LBB46_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw udec_wrap ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_udec_wrap_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_udec_wrap_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB47_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    sub x10, x8, #1
-; CHECK-NEXT:    ccmp x8, #0, #4, ls
-; CHECK-NEXT:    csel x10, x0, x10, eq
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB47_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB47_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    cmp x8, x0
+; INLINE_ATOMICS-NEXT:    sub x10, x8, #1
+; INLINE_ATOMICS-NEXT:    ccmp x8, #0, #4, ls
+; INLINE_ATOMICS-NEXT:    csel x10, x0, x10, eq
+; INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB47_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_udec_wrap_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB47_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    cmp x8, x0
+; OUTLINE_ATOMICS-NEXT:    sub x10, x8, #1
+; OUTLINE_ATOMICS-NEXT:    ccmp x8, #0, #4, ls
+; OUTLINE_ATOMICS-NEXT:    csel x10, x0, x10, eq
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB47_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_udec_wrap_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldr x8, [x8, :lo12:var64]
+; LSE-NEXT:    mov x10, x8
+; LSE-NEXT:  .LBB47_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    cmp x8, x0
+; LSE-NEXT:    sub x11, x8, #1
+; LSE-NEXT:    ccmp x8, #0, #4, ls
+; LSE-NEXT:    csel x11, x0, x11, eq
+; LSE-NEXT:    casa x10, x11, [x9]
+; LSE-NEXT:    cmp x10, x8
+; LSE-NEXT:    mov x8, x10
+; LSE-NEXT:    b.ne .LBB47_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov x0, x8
+; LSE-NEXT:    ret
    %old = atomicrmw udec_wrap ptr @var64, i64 %offset acquire
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_usub_cond_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_cond_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB48_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    sub w10, w8, w0
-; CHECK-NEXT:    cmp w8, w0, uxtb
-; CHECK-NEXT:    csel w10, w10, w8, hs
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB48_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB48_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; INLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB48_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB48_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB48_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_cond_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldrb w8, [x8, :lo12:var8]
+; LSE-NEXT:  .LBB48_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xff
+; LSE-NEXT:    sub w11, w8, w0
+; LSE-NEXT:    cmp w10, w0, uxtb
+; LSE-NEXT:    csel w11, w11, w8, hs
+; LSE-NEXT:    caslb w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB48_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_cond ptr @var8, i8 %offset release
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_usub_cond_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_cond_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB49_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    sub w10, w8, w0
-; CHECK-NEXT:    cmp w8, w0, uxth
-; CHECK-NEXT:    csel w10, w10, w8, hs
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB49_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB49_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; INLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; INLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB49_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB49_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    sub w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    cmp w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB49_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_cond_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldrh w8, [x8, :lo12:var16]
+; LSE-NEXT:  .LBB49_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xffff
+; LSE-NEXT:    sub w11, w8, w0
+; LSE-NEXT:    cmp w10, w0, uxth
+; LSE-NEXT:    csel w11, w11, w8, hs
+; LSE-NEXT:    cash w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB49_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_cond ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_usub_cond_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_cond_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB50_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0
-; CHECK-NEXT:    csel w10, w10, w8, hs
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB50_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB50_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, w10, w8, hs
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB50_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_cond_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldr w8, [x8, :lo12:var32]
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:  .LBB50_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    subs w11, w8, w0
+; LSE-NEXT:    csel w11, w11, w8, hs
+; LSE-NEXT:    casal w10, w11, [x9]
+; LSE-NEXT:    cmp w10, w8
+; LSE-NEXT:    mov w8, w10
+; LSE-NEXT:    b.ne .LBB50_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_cond ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_usub_cond_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_cond_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB51_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    subs x10, x8, x0
-; CHECK-NEXT:    csel x10, x10, x8, hs
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB51_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB51_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    subs x10, x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, x10, x8, hs
+; INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB51_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_cond_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB51_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs x10, x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, x10, x8, hs
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB51_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_cond_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldr x8, [x8, :lo12:var64]
+; LSE-NEXT:    mov x10, x8
+; LSE-NEXT:  .LBB51_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    subs x11, x8, x0
+; LSE-NEXT:    csel x11, x11, x8, hs
+; LSE-NEXT:    casa x10, x11, [x9]
+; LSE-NEXT:    cmp x10, x8
+; LSE-NEXT:    mov x8, x10
+; LSE-NEXT:    b.ne .LBB51_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov x0, x8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_cond ptr @var64, i64 %offset acquire
    ret i64 %old
 }
 
 define dso_local i8 @test_atomic_load_usub_sat_i8(i8 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_sat_i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var8
-; CHECK-NEXT:    add x9, x9, :lo12:var8
-; CHECK-NEXT:  .LBB52_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrb w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0, uxtb
-; CHECK-NEXT:    csel w10, wzr, w10, lo
-; CHECK-NEXT:    stlxrb w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB52_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i8:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var8
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; INLINE_ATOMICS-NEXT:  .LBB52_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0, uxtb
+; INLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; INLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB52_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i8:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var8
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
+; OUTLINE_ATOMICS-NEXT:  .LBB52_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrb w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0, uxtb
+; OUTLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; OUTLINE_ATOMICS-NEXT:    stlxrb w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB52_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_sat_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    adrp x9, var8
+; LSE-NEXT:    add x9, x9, :lo12:var8
+; LSE-NEXT:    ldrb w8, [x8, :lo12:var8]
+; LSE-NEXT:  .LBB52_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xff
+; LSE-NEXT:    subs w11, w10, w0, uxtb
+; LSE-NEXT:    csel w11, wzr, w11, lo
+; LSE-NEXT:    caslb w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB52_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_sat ptr @var8, i8 %offset release
    ret i8 %old
 }
 
 define dso_local i16 @test_atomic_load_usub_sat_i16(i16 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_sat_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var16
-; CHECK-NEXT:    add x9, x9, :lo12:var16
-; CHECK-NEXT:  .LBB53_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldxrh w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0, uxth
-; CHECK-NEXT:    csel w10, wzr, w10, lo
-; CHECK-NEXT:    stxrh w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB53_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i16:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var16
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; INLINE_ATOMICS-NEXT:  .LBB53_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0, uxth
+; INLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; INLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB53_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i16:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var16
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
+; OUTLINE_ATOMICS-NEXT:  .LBB53_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldxrh w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0, uxth
+; OUTLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; OUTLINE_ATOMICS-NEXT:    stxrh w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB53_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_sat_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    adrp x9, var16
+; LSE-NEXT:    add x9, x9, :lo12:var16
+; LSE-NEXT:    ldrh w8, [x8, :lo12:var16]
+; LSE-NEXT:  .LBB53_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    and w10, w8, #0xffff
+; LSE-NEXT:    subs w11, w10, w0, uxth
+; LSE-NEXT:    csel w11, wzr, w11, lo
+; LSE-NEXT:    cash w8, w11, [x9]
+; LSE-NEXT:    cmp w8, w10
+; LSE-NEXT:    b.ne .LBB53_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_sat ptr @var16, i16 %offset monotonic
    ret i16 %old
 }
 
 define dso_local i32 @test_atomic_load_usub_sat_i32(i32 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_sat_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var32
-; CHECK-NEXT:    add x9, x9, :lo12:var32
-; CHECK-NEXT:  .LBB54_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w8, [x9]
-; CHECK-NEXT:    subs w10, w8, w0
-; CHECK-NEXT:    csel w10, wzr, w10, lo
-; CHECK-NEXT:    stlxr w11, w10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB54_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov w0, w8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var32
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; INLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; INLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; INLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; INLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov w0, w8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i32:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var32
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var32
+; OUTLINE_ATOMICS-NEXT:  .LBB54_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr w8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs w10, w8, w0
+; OUTLINE_ATOMICS-NEXT:    csel w10, wzr, w10, lo
+; OUTLINE_ATOMICS-NEXT:    stlxr w11, w10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB54_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov w0, w8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_sat_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    adrp x9, var32
+; LSE-NEXT:    add x9, x9, :lo12:var32
+; LSE-NEXT:    ldr w8, [x8, :lo12:var32]
+; LSE-NEXT:    mov w10, w8
+; LSE-NEXT:  .LBB54_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    subs w11, w8, w0
+; LSE-NEXT:    csel w11, wzr, w11, lo
+; LSE-NEXT:    casal w10, w11, [x9]
+; LSE-NEXT:    cmp w10, w8
+; LSE-NEXT:    mov w8, w10
+; LSE-NEXT:    b.ne .LBB54_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov w0, w8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_sat ptr @var32, i32 %offset seq_cst
    ret i32 %old
 }
 
 define dso_local i64 @test_atomic_load_usub_sat_i64(i64 %offset) nounwind {
-; CHECK-LABEL: test_atomic_load_usub_sat_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, var64
-; CHECK-NEXT:    add x9, x9, :lo12:var64
-; CHECK-NEXT:  .LBB55_1: // %atomicrmw.start
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr x8, [x9]
-; CHECK-NEXT:    subs x10, x8, x0
-; CHECK-NEXT:    csel x10, xzr, x10, lo
-; CHECK-NEXT:    stxr w11, x10, [x9]
-; CHECK-NEXT:    cbnz w11, .LBB55_1
-; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
-; CHECK-NEXT:    mov x0, x8
-; CHECK-NEXT:    ret
+; INLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i64:
+; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    adrp x9, var64
+; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; INLINE_ATOMICS-NEXT:  .LBB55_1: // %atomicrmw.start
+; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; INLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; INLINE_ATOMICS-NEXT:    subs x10, x8, x0
+; INLINE_ATOMICS-NEXT:    csel x10, xzr, x10, lo
+; INLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; INLINE_ATOMICS-NEXT:    cbnz w11, .LBB55_1
+; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    mov x0, x8
+; INLINE_ATOMICS-NEXT:    ret
+;
+; OUTLINE_ATOMICS-LABEL: test_atomic_load_usub_sat_i64:
+; OUTLINE_ATOMICS:       // %bb.0:
+; OUTLINE_ATOMICS-NEXT:    adrp x9, var64
+; OUTLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var64
+; OUTLINE_ATOMICS-NEXT:  .LBB55_1: // %atomicrmw.start
+; OUTLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
+; OUTLINE_ATOMICS-NEXT:    ldaxr x8, [x9]
+; OUTLINE_ATOMICS-NEXT:    subs x10, x8, x0
+; OUTLINE_ATOMICS-NEXT:    csel x10, xzr, x10, lo
+; OUTLINE_ATOMICS-NEXT:    stxr w11, x10, [x9]
+; OUTLINE_ATOMICS-NEXT:    cbnz w11, .LBB55_1
+; OUTLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE_ATOMICS-NEXT:    mov x0, x8
+; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_load_usub_sat_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    adrp x9, var64
+; LSE-NEXT:    add x9, x9, :lo12:var64
+; LSE-NEXT:    ldr x8, [x8, :lo12:var64]
+; LSE-NEXT:    mov x10, x8
+; LSE-NEXT:  .LBB55_1: // %atomicrmw.start
+; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; LSE-NEXT:    subs x11, x8, x0
+; LSE-NEXT:    csel x11, xzr, x11, lo
+; LSE-NEXT:    casa x10, x11, [x9]
+; LSE-NEXT:    cmp x10, x8
+; LSE-NEXT:    mov x8, x10
+; LSE-NEXT:    b.ne .LBB55_1
+; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    mov x0, x8
+; LSE-NEXT:    ret
    %old = atomicrmw usub_sat ptr @var64, i64 %offset acquire
    ret i64 %old
 }
@@ -1318,6 +2411,13 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas1_acq
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_cmpxchg_i8:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var8
+; LSE-NEXT:    add x8, x8, :lo12:var8
+; LSE-NEXT:    casab w0, w1, [x8]
+; LSE-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
    ret i8 %old
@@ -1354,6 +2454,13 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_cmpxchg_i16:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var16
+; LSE-NEXT:    add x8, x8, :lo12:var16
+; LSE-NEXT:    casalh w0, w1, [x8]
+; LSE-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
    ret i16 %old
@@ -1390,6 +2497,13 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    bl __aarch64_cas4_rel
 ; OUTLINE_ATOMICS-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_cmpxchg_i32:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var32
+; LSE-NEXT:    add x8, x8, :lo12:var32
+; LSE-NEXT:    casl w0, w1, [x8]
+; LSE-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0
    ret i32 %old
@@ -1427,6 +2541,14 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; OUTLINE_ATOMICS-NEXT:    str x0, [x19]
 ; OUTLINE_ATOMICS-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 ; OUTLINE_ATOMICS-NEXT:    ret
+;
+; LSE-LABEL: test_atomic_cmpxchg_i64:
+; LSE:       // %bb.0:
+; LSE-NEXT:    adrp x8, var64
+; LSE-NEXT:    add x8, x8, :lo12:var64
+; LSE-NEXT:    cas x0, x1, [x8]
+; LSE-NEXT:    str x0, [x8]
+; LSE-NEXT:    ret
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
    store i64 %old, ptr @var64


        


More information about the llvm-commits mailing list