[llvm] [AArch64] Enable dead register definitions at O0 (PR #145174)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 21 11:42:10 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/145174

>From 80e63f42c99832aa85f7a0289376203330bbf2d3 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 21 Jun 2025 11:45:34 -0400
Subject: [PATCH 1/3] Pre-commit test

---
 llvm/test/CodeGen/AArch64/fast-isel-O0-cmp | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/fast-isel-O0-cmp

diff --git a/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp b/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp
new file mode 100644
index 0000000000000..88033a831511f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+
+; even in -O0, cmp should be cmp
+define i1 @cmp(i32 %0) {
+; CHECK-LABEL: cmp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs w8, w0, #5
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    // kill: def $w1 killed $w0
+; CHECK-NEXT:    ret
+  %2 = icmp sgt i32 %0, 5
+  ret i1 %2
+}
+
+define i1 @cmn(i32 %0) {
+; CHECK-LABEL: cmn:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adds w8, w0, #5
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    // kill: def $w1 killed $w0
+; CHECK-NEXT:    ret
+  %2 = icmp sgt i32 %0, -5
+  ret i1 %2
+}
+
+; Test that 0 is cmp
+define i1 @cmp0(i32 %0) {
+; CHECK-LABEL: cmp0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    subs w8, w0, #0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    // kill: def $w1 killed $w0
+; CHECK-NEXT:    ret
+  %2 = icmp sgt i32 %0, 0
+  ret i1 %2
+}
+

>From b055639eea4c0f6acbcfc526b0427e0e3ebf8b58 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 21 Jun 2025 11:55:10 -0400
Subject: [PATCH 2/3] Enable dead register definitions at O0

This is because the assembler/disassembler will otherwise show this as an adds or subs and not cmp or cmn. Even gcc does this. And, this is just allocating register to 0 reg. Will this optimize -O0? Well, minimally it will put less register pressure maybe, but that is not the point. The point is that it should resolve to aliases for better understanding, which is why -O0 exists to begin with.
---
 llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 5 +++--
 llvm/test/CodeGen/AArch64/fast-isel-O0-cmp       | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 8150e91c8ba52..9fe46181db819 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -803,8 +803,9 @@ bool AArch64PassConfig::addILPOpts() {
 
 void AArch64PassConfig::addPreRegAlloc() {
   // Change dead register definitions to refer to the zero register.
-  if (TM->getOptLevel() != CodeGenOptLevel::None &&
-      EnableDeadRegisterElimination)
+  // This is beneficial even at -O0 as we can show CMP/CMN in the assembler
+  // output.
+  if (EnableDeadRegisterElimination)
     addPass(createAArch64DeadRegisterDefinitions());
 
   // Use AdvSIMD scalar instructions whenever profitable.
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp b/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp
index 88033a831511f..e5d97df40db25 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp
+++ b/llvm/test/CodeGen/AArch64/fast-isel-O0-cmp
@@ -5,7 +5,7 @@
 define i1 @cmp(i32 %0) {
 ; CHECK-LABEL: cmp:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, #5
+; CHECK-NEXT:    cmp w0, #5
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    // kill: def $w1 killed $w0
 ; CHECK-NEXT:    ret
@@ -16,7 +16,7 @@ define i1 @cmp(i32 %0) {
 define i1 @cmn(i32 %0) {
 ; CHECK-LABEL: cmn:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adds w8, w0, #5
+; CHECK-NEXT:    cmn w0, #5
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    // kill: def $w1 killed $w0
 ; CHECK-NEXT:    ret
@@ -28,7 +28,7 @@ define i1 @cmn(i32 %0) {
 define i1 @cmp0(i32 %0) {
 ; CHECK-LABEL: cmp0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    subs w8, w0, #0
+; CHECK-NEXT:    cmp w0, #0
 ; CHECK-NEXT:    cset w0, gt
 ; CHECK-NEXT:    // kill: def $w1 killed $w0
 ; CHECK-NEXT:    ret

>From 186d9a7a56a7e8d6d5ac3cc26fc0b5f4e1ef9980 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 21 Jun 2025 14:41:48 -0400
Subject: [PATCH 3/3] ok

---
 .../aarch64-atomic-store-outline_atomics.ll   |    8 +-
 .../Atomics/aarch64-atomic-store-rcpc.ll      |    8 +-
 .../Atomics/aarch64-atomic-store-v8_1a.ll     |    8 +-
 .../Atomics/aarch64-atomic-store-v8a.ll       |    8 +-
 .../AArch64/Atomics/aarch64-atomicrmw-lse2.ll | 3050 ++++++++---------
 .../Atomics/aarch64-atomicrmw-lse2_lse128.ll  |  600 ++--
 .../AArch64/Atomics/aarch64-atomicrmw-lsfe.ll |  150 +-
 .../aarch64-atomicrmw-outline_atomics.ll      | 1030 +++---
 .../AArch64/Atomics/aarch64-atomicrmw-rcpc.ll | 3050 ++++++++---------
 .../Atomics/aarch64-atomicrmw-rcpc3.ll        | 3050 ++++++++---------
 .../Atomics/aarch64-atomicrmw-v8_1a.ll        |  630 ++--
 .../AArch64/Atomics/aarch64-atomicrmw-v8a.ll  | 3050 ++++++++---------
 .../Atomics/aarch64-atomicrmw-v8a_fp.ll       |  900 ++---
 .../AArch64/Atomics/aarch64-cmpxchg-lse2.ll   |  600 ++--
 .../AArch64/Atomics/aarch64-cmpxchg-rcpc.ll   |  600 ++--
 .../AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll  |  600 ++--
 .../AArch64/Atomics/aarch64-cmpxchg-v8a.ll    |  600 ++--
 ...aarch64_be-atomic-store-outline_atomics.ll |    8 +-
 .../Atomics/aarch64_be-atomic-store-rcpc.ll   |    8 +-
 .../Atomics/aarch64_be-atomic-store-v8_1a.ll  |    8 +-
 .../Atomics/aarch64_be-atomic-store-v8a.ll    |    8 +-
 .../Atomics/aarch64_be-atomicrmw-lse2.ll      | 2840 +++++++--------
 .../aarch64_be-atomicrmw-lse2_lse128.ll       |  330 +-
 .../Atomics/aarch64_be-atomicrmw-lsfe.ll      |  120 +-
 .../aarch64_be-atomicrmw-outline_atomics.ll   |  760 ++--
 .../Atomics/aarch64_be-atomicrmw-rcpc.ll      | 2840 +++++++--------
 .../Atomics/aarch64_be-atomicrmw-rcpc3.ll     | 2840 +++++++--------
 .../Atomics/aarch64_be-atomicrmw-v8_1a.ll     |  360 +-
 .../Atomics/aarch64_be-atomicrmw-v8a.ll       | 2840 +++++++--------
 .../Atomics/aarch64_be-atomicrmw-v8a_fp.ll    |  720 ++--
 .../Atomics/aarch64_be-cmpxchg-lse2.ll        |  600 ++--
 .../Atomics/aarch64_be-cmpxchg-rcpc.ll        |  600 ++--
 .../Atomics/aarch64_be-cmpxchg-rcpc3.ll       |  600 ++--
 .../AArch64/Atomics/aarch64_be-cmpxchg-v8a.ll |  600 ++--
 .../AArch64/GlobalISel/arm64-atomic.ll        |  786 ++---
 llvm/test/CodeGen/AArch64/O0-pipeline.ll      |    1 +
 .../arm64-fast-isel-conversion-fallback.ll    |   10 +-
 llvm/test/CodeGen/AArch64/arm64-fast-isel.ll  |   41 +-
 llvm/test/CodeGen/AArch64/atomicrmw-O0.ll     |  116 +-
 .../test/CodeGen/AArch64/br-cond-not-merge.ll |  145 +
 llvm/test/CodeGen/AArch64/cmpxchg-O0.ll       |  289 +-
 .../AArch64/fast-isel-atomic-fallback.ll      |   12 +-
 .../test/CodeGen/AArch64/fast-isel-cmpxchg.ll |   50 +
 .../CodeGen/AArch64/misched-fusion-cmp-bcc.ll |   11 +
 .../CodeGen/AArch64/sme-aarch64-svcount.ll    |    2 +-
 45 files changed, 17658 insertions(+), 17829 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
index e594561010464..af942dcd34bbc 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
@@ -121,7 +121,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -136,7 +136,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -151,7 +151,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -166,7 +166,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O1:    ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc.ll
index c8971b1b877bd..948ce41770c09 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc.ll
@@ -125,7 +125,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -144,7 +144,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -163,7 +163,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -182,7 +182,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O1:    ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8_1a.ll
index 00bcedf4b7a64..943092795aa8f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8_1a.ll
@@ -121,7 +121,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
 ; -O1:    ldp x4, x5, [x2]
@@ -138,7 +138,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x2]
@@ -155,7 +155,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x2]
@@ -172,7 +172,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8a.ll
index 250ae1b99e887..ddaa8437185df 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-v8a.ll
@@ -125,7 +125,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -144,7 +144,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -163,7 +163,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
 ; -O1:    ldxp xzr, x8, [x2]
@@ -182,7 +182,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O1:    ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index 94d46148f37e3..a1242100818e7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -5,11 +5,11 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -20,11 +20,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -35,11 +35,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -50,11 +50,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -65,11 +65,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -80,10 +80,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -94,10 +94,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -108,10 +108,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -122,10 +122,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -136,10 +136,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -150,10 +150,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -164,10 +164,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -178,10 +178,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -192,10 +192,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -206,10 +206,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -220,10 +220,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -234,10 +234,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -248,10 +248,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -262,10 +262,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -276,10 +276,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -298,7 +298,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldxp x8, x1, [x0]
@@ -317,7 +317,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -336,7 +336,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldxp x8, x1, [x0]
@@ -355,7 +355,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -374,7 +374,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -385,11 +385,11 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -400,11 +400,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -415,11 +415,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -430,11 +430,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -445,11 +445,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -600,12 +600,12 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -617,12 +617,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -634,12 +634,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -651,12 +651,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -668,12 +668,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -685,11 +685,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -701,11 +701,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -717,11 +717,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -733,11 +733,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -749,11 +749,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -765,11 +765,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -781,11 +781,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -797,11 +797,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -813,11 +813,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -829,11 +829,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -845,11 +845,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -861,11 +861,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -877,11 +877,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -893,11 +893,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -909,11 +909,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -926,7 +926,7 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -935,7 +935,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -948,7 +948,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -957,7 +957,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -970,7 +970,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -979,7 +979,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -992,7 +992,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1001,7 +1001,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1014,7 +1014,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1023,7 +1023,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1035,12 +1035,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1052,12 +1052,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1069,12 +1069,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1086,12 +1086,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1103,12 +1103,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1301,7 +1301,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1315,7 +1315,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1329,7 +1329,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1343,7 +1343,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1357,7 +1357,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1370,12 +1370,12 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1387,12 +1387,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1404,12 +1404,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1421,12 +1421,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1438,12 +1438,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1455,11 +1455,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1471,11 +1471,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1487,11 +1487,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1503,11 +1503,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1519,11 +1519,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1535,11 +1535,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1551,11 +1551,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1567,11 +1567,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1583,11 +1583,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1599,11 +1599,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1615,11 +1615,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1631,11 +1631,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1647,11 +1647,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1663,11 +1663,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1679,11 +1679,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1696,6 +1696,7 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1704,7 +1705,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1717,6 +1718,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1725,7 +1727,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1738,6 +1740,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1746,7 +1749,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1759,6 +1762,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1767,7 +1771,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1780,6 +1784,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1788,7 +1793,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1800,12 +1805,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1817,12 +1822,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1834,12 +1839,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1851,12 +1856,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1868,12 +1873,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2066,6 +2071,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2079,6 +2085,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2092,6 +2099,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2105,6 +2113,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2118,6 +2127,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -2130,12 +2140,12 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2147,12 +2157,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2164,12 +2174,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2181,12 +2191,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2198,12 +2208,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2215,11 +2225,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2231,11 +2241,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2247,11 +2257,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2263,11 +2273,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2279,11 +2289,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2295,11 +2305,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2311,11 +2321,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2327,11 +2337,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2343,11 +2353,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2359,11 +2369,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2375,11 +2385,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2391,11 +2401,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2407,11 +2417,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2423,11 +2433,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2439,11 +2449,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2465,7 +2475,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2488,7 +2498,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2511,7 +2521,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2534,7 +2544,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2557,7 +2567,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2570,12 +2580,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2587,12 +2597,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2604,12 +2614,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2621,12 +2631,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2638,12 +2648,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2910,13 +2920,13 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2929,13 +2939,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2948,13 +2958,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2967,13 +2977,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2986,13 +2996,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3006,11 +3016,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3024,11 +3034,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3042,11 +3052,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3060,11 +3070,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3078,11 +3088,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3096,11 +3106,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3114,11 +3124,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3132,11 +3142,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3150,11 +3160,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3168,11 +3178,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3186,11 +3196,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3204,11 +3214,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3222,11 +3232,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3240,11 +3250,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3258,11 +3268,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3287,7 +3297,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3314,7 +3324,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3341,7 +3351,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3368,7 +3378,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3395,7 +3405,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3410,13 +3420,13 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3429,13 +3439,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3448,13 +3458,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3467,13 +3477,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3486,13 +3496,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3810,12 +3820,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3827,12 +3837,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3844,12 +3854,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3861,12 +3871,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3878,12 +3888,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3895,11 +3905,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3911,11 +3921,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3927,11 +3937,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3943,11 +3953,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3959,11 +3969,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3975,11 +3985,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3991,11 +4001,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4007,11 +4017,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4023,11 +4033,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4039,11 +4049,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4055,11 +4065,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4071,11 +4081,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4087,11 +4097,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4103,11 +4113,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4119,11 +4129,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4145,7 +4155,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4168,7 +4178,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4191,7 +4201,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4214,7 +4224,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4237,7 +4247,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4250,12 +4260,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4267,12 +4277,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4284,12 +4294,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4301,12 +4311,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4318,12 +4328,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4590,12 +4600,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4607,12 +4617,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4624,12 +4634,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4641,12 +4651,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4658,12 +4668,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4675,11 +4685,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4691,11 +4701,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4707,11 +4717,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4723,11 +4733,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4739,11 +4749,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4755,11 +4765,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4771,11 +4781,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4787,11 +4797,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4803,11 +4813,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4819,11 +4829,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4835,11 +4845,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4851,11 +4861,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4867,11 +4877,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4883,11 +4893,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4899,11 +4909,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4925,7 +4935,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4948,7 +4958,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4971,7 +4981,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4994,7 +5004,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5017,7 +5027,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5030,12 +5040,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -5047,12 +5057,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -5064,12 +5074,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -5081,12 +5091,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -5098,12 +5108,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5370,14 +5380,14 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5391,14 +5401,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5412,14 +5422,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5433,14 +5443,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5454,14 +5464,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5475,13 +5485,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5495,13 +5505,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5515,13 +5525,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5535,13 +5545,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5555,13 +5565,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5575,12 +5585,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5593,12 +5603,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5611,12 +5621,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5629,12 +5639,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5647,12 +5657,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5665,12 +5675,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5683,12 +5693,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5701,12 +5711,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5719,12 +5729,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5737,12 +5747,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5755,13 +5765,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5771,7 +5779,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5785,13 +5793,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5801,7 +5807,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5815,13 +5821,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5831,7 +5835,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5845,13 +5849,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5861,7 +5863,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5875,13 +5877,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5891,7 +5891,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5905,14 +5905,14 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5926,14 +5926,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5947,14 +5947,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5968,14 +5968,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5989,14 +5989,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6011,7 +6011,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6027,7 +6027,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6043,7 +6043,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6059,7 +6059,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6075,7 +6075,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6090,7 +6090,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6104,7 +6104,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6118,7 +6118,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6132,7 +6132,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6146,7 +6146,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6160,7 +6160,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6174,7 +6174,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6188,7 +6188,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6202,7 +6202,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6216,7 +6216,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6230,13 +6230,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6252,13 +6250,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6274,13 +6270,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6296,13 +6290,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6318,13 +6310,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6340,14 +6330,14 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6361,14 +6351,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6382,14 +6372,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6403,14 +6393,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6424,14 +6414,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6445,13 +6435,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6465,13 +6455,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6485,13 +6475,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6505,13 +6495,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6525,13 +6515,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6545,12 +6535,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6563,12 +6553,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6581,12 +6571,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6599,12 +6589,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6617,12 +6607,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6635,12 +6625,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6653,12 +6643,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6671,12 +6661,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6689,12 +6679,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6707,12 +6697,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6725,13 +6715,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6741,7 +6729,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6755,13 +6743,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6771,7 +6757,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6785,13 +6771,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6801,7 +6785,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6815,13 +6799,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6831,7 +6813,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6845,13 +6827,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6861,7 +6841,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6875,14 +6855,14 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6896,14 +6876,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6917,14 +6897,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6938,14 +6918,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6959,14 +6939,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6981,7 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6997,7 +6977,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7013,7 +6993,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7029,7 +7009,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7045,7 +7025,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7060,7 +7040,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7074,7 +7054,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7088,7 +7068,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7102,7 +7082,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7116,7 +7096,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7130,7 +7110,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7144,7 +7124,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7158,7 +7138,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7172,7 +7152,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7186,7 +7166,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7200,13 +7180,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7222,13 +7200,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7244,13 +7220,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7266,13 +7240,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7288,13 +7260,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7310,14 +7280,14 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7331,14 +7301,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7352,14 +7322,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7373,14 +7343,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7394,14 +7364,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7415,12 +7385,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7434,12 +7404,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7453,12 +7423,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7472,12 +7442,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7491,12 +7461,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7510,12 +7480,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7528,12 +7498,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7546,12 +7516,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7564,12 +7534,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7582,12 +7552,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7600,12 +7570,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7618,12 +7588,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7636,12 +7606,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7654,12 +7624,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7672,12 +7642,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7690,13 +7660,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7706,7 +7674,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7720,13 +7688,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7736,7 +7702,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7750,13 +7716,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7766,7 +7730,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7780,13 +7744,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7796,7 +7758,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7810,13 +7772,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7826,7 +7786,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7840,14 +7800,14 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7861,14 +7821,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7882,14 +7842,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7903,14 +7863,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7924,14 +7884,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7945,7 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7960,7 +7920,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7975,7 +7935,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7990,7 +7950,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8005,7 +7965,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8020,7 +7980,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8034,7 +7994,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8048,7 +8008,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8062,7 +8022,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8076,7 +8036,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8090,7 +8050,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8104,7 +8064,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8118,7 +8078,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8132,7 +8092,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8146,7 +8106,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8160,13 +8120,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8182,13 +8140,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8204,13 +8160,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8226,13 +8180,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8248,13 +8200,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8270,14 +8220,14 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8291,14 +8241,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8312,14 +8262,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8333,14 +8283,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8354,14 +8304,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8375,12 +8325,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8394,12 +8344,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8413,12 +8363,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8432,12 +8382,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8451,12 +8401,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8470,12 +8420,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8488,12 +8438,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8506,12 +8456,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8524,12 +8474,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8542,12 +8492,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8560,12 +8510,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8578,12 +8528,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8596,12 +8546,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8614,12 +8564,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8632,12 +8582,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8650,13 +8600,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8666,7 +8614,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8680,13 +8628,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8696,7 +8642,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8710,13 +8656,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8726,7 +8670,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8740,13 +8684,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8756,7 +8698,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8770,13 +8712,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8786,7 +8726,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8800,14 +8740,14 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8821,14 +8761,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8842,14 +8782,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8863,14 +8803,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8884,14 +8824,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8905,7 +8845,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8920,7 +8860,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8935,7 +8875,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8950,7 +8890,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8965,7 +8905,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8980,7 +8920,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8994,7 +8934,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9008,7 +8948,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9022,7 +8962,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9036,7 +8976,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9050,7 +8990,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9064,7 +9004,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9078,7 +9018,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9092,7 +9032,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9106,7 +9046,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9120,13 +9060,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9142,13 +9080,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9164,13 +9100,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9186,13 +9120,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9208,13 +9140,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index 1fad4a6b54f6b..e74ce4dd594d4 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -511,12 +511,12 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -531,12 +531,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -551,12 +551,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -571,12 +571,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -591,12 +591,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -826,7 +826,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -840,7 +840,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -854,7 +854,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -868,7 +868,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -882,7 +882,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1036,11 +1036,12 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -1055,11 +1056,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -1074,11 +1076,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -1093,11 +1096,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -1112,11 +1116,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -1346,6 +1351,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1359,6 +1365,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1372,6 +1379,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1385,6 +1393,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1398,6 +1407,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -1939,7 +1949,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -1956,7 +1966,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casab w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -1973,7 +1983,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    caslb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -1990,7 +2000,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2007,7 +2017,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2023,7 +2033,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    cash w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2039,7 +2049,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casah w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2055,7 +2065,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    caslh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2071,7 +2081,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casalh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2087,7 +2097,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casalh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2103,7 +2113,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    cas w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2119,7 +2129,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casa w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2135,7 +2145,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casl w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2151,7 +2161,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casal w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2167,7 +2177,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casal w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2183,7 +2193,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    cas x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    and x10, x8, x1
@@ -2199,7 +2209,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casa x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    and x10, x8, x1
@@ -2215,7 +2225,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casl x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    and x10, x8, x1
@@ -2231,7 +2241,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casal x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    and x10, x8, x1
@@ -2247,7 +2257,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casal x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    and x10, x8, x1
@@ -2268,7 +2278,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -2293,7 +2303,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -2318,7 +2328,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -2343,7 +2353,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -2368,7 +2378,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -2389,7 +2399,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    mvn w8, w8
 ; -O0:    casb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2406,7 +2416,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casab w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2423,7 +2433,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    caslb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    and w10, w8, w1
@@ -2440,7 +2450,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2457,7 +2467,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -3401,7 +3411,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -3422,7 +3432,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -3443,7 +3453,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -3464,7 +3474,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -3485,7 +3495,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -3930,19 +3940,17 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -3958,19 +3966,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -3986,19 +3992,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -4014,19 +4018,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -4042,19 +4044,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -4106,7 +4106,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4122,7 +4122,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4138,7 +4138,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4154,7 +4154,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4170,7 +4170,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4185,7 +4185,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4199,7 +4199,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4213,7 +4213,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4227,7 +4227,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4241,7 +4241,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4255,7 +4255,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4269,7 +4269,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4283,7 +4283,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4297,7 +4297,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4311,7 +4311,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4325,13 +4325,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4347,13 +4345,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4369,13 +4365,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4391,13 +4385,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4413,13 +4405,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4575,19 +4565,17 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -4603,19 +4591,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -4631,19 +4617,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -4659,19 +4643,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -4687,19 +4669,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -4751,7 +4731,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4767,7 +4747,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4783,7 +4763,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4799,7 +4779,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4815,7 +4795,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4830,7 +4810,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4844,7 +4824,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4858,7 +4838,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4872,7 +4852,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4886,7 +4866,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4900,7 +4880,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4914,7 +4894,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4928,7 +4908,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4942,7 +4922,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4956,7 +4936,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4970,13 +4950,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4992,13 +4970,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5014,13 +4990,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5036,13 +5010,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5058,13 +5030,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5220,19 +5190,17 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -5248,19 +5216,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -5276,19 +5242,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -5304,19 +5268,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -5332,19 +5294,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -5395,7 +5355,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5410,7 +5370,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5425,7 +5385,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5440,7 +5400,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5455,7 +5415,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5470,7 +5430,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5484,7 +5444,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5498,7 +5458,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5512,7 +5472,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5526,7 +5486,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5540,7 +5500,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5554,7 +5514,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5568,7 +5528,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5582,7 +5542,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5596,7 +5556,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5610,13 +5570,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5632,13 +5590,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5654,13 +5610,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5676,13 +5630,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5698,13 +5650,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5860,19 +5810,17 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -5888,19 +5836,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -5916,19 +5862,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -5944,19 +5888,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -5972,19 +5914,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -6035,7 +5975,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6050,7 +5990,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6065,7 +6005,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6080,7 +6020,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6095,7 +6035,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6110,7 +6050,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6124,7 +6064,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6138,7 +6078,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6152,7 +6092,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6166,7 +6106,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6180,7 +6120,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6194,7 +6134,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6208,7 +6148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6222,7 +6162,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6236,7 +6176,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6250,13 +6190,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6272,13 +6210,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6294,13 +6230,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6316,13 +6250,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6338,13 +6270,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
index e0f2dd728c5e6..9e984c691f07c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lsfe.ll
@@ -379,10 +379,10 @@ define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -393,10 +393,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -407,10 +407,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -421,10 +421,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -435,10 +435,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -451,10 +451,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -469,10 +469,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -487,10 +487,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -505,10 +505,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -523,10 +523,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -539,10 +539,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -553,10 +553,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -567,10 +567,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -581,10 +581,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -595,10 +595,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -609,10 +609,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -623,10 +623,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -637,10 +637,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -651,10 +651,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -665,10 +665,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index 4605bdd2f6073..3cd90341484ef 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -149,7 +149,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldxp x8, x1, [x0]
@@ -164,7 +164,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -179,7 +179,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldxp x8, x1, [x0]
@@ -194,7 +194,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -209,7 +209,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -536,12 +536,12 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x2, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -554,12 +554,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x2, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -572,12 +572,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x2, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -590,12 +590,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x2, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -608,12 +608,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x2, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -841,7 +841,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -855,7 +855,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -869,7 +869,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -883,7 +883,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -897,7 +897,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1131,11 +1131,12 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x2, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1148,11 +1149,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x2, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1165,11 +1167,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x2, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1182,11 +1185,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x2, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1199,11 +1203,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x2, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1451,6 +1456,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1464,6 +1470,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1477,6 +1484,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1490,6 +1498,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1503,6 +1512,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -1761,7 +1771,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1780,7 +1790,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1799,7 +1809,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1818,7 +1828,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1837,7 +1847,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2169,7 +2179,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2186,7 +2196,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2203,7 +2213,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2220,7 +2230,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2237,7 +2247,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2253,7 +2263,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2269,7 +2279,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2285,7 +2295,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2301,7 +2311,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2317,7 +2327,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2333,7 +2343,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2349,7 +2359,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2365,7 +2375,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2381,7 +2391,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2397,7 +2407,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2413,7 +2423,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2429,7 +2439,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2445,7 +2455,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2461,7 +2471,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2477,7 +2487,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2498,7 +2508,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2521,7 +2531,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2544,7 +2554,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2567,7 +2577,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2590,7 +2600,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2609,7 +2619,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2626,7 +2636,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2643,7 +2653,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2660,7 +2670,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2677,7 +2687,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3141,7 +3151,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3160,7 +3170,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3179,7 +3189,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3198,7 +3208,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3217,7 +3227,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3666,7 +3676,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3685,7 +3695,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3704,7 +3714,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3723,7 +3733,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3742,7 +3752,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4046,11 +4056,11 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -4065,11 +4075,11 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -4084,11 +4094,11 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -4103,11 +4113,11 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -4122,11 +4132,11 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -4141,10 +4151,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -4159,10 +4169,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -4177,10 +4187,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -4195,10 +4205,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -4213,10 +4223,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -4230,10 +4240,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4246,10 +4256,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4262,10 +4272,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4278,10 +4288,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4294,10 +4304,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4310,10 +4320,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4326,10 +4336,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4342,10 +4352,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4358,10 +4368,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4374,10 +4384,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4390,19 +4400,17 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4416,19 +4424,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4442,19 +4448,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4468,19 +4472,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4494,19 +4496,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4521,11 +4521,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -4540,11 +4540,11 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -4559,11 +4559,11 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -4578,11 +4578,11 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -4597,11 +4597,11 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -4616,7 +4616,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4632,7 +4632,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4648,7 +4648,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4664,7 +4664,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4680,7 +4680,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4695,7 +4695,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4709,7 +4709,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4723,7 +4723,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4737,7 +4737,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4751,7 +4751,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4765,7 +4765,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4779,7 +4779,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4793,7 +4793,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4807,7 +4807,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4821,7 +4821,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4835,13 +4835,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4857,13 +4855,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4879,13 +4875,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4901,13 +4895,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4923,13 +4915,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4946,11 +4936,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -4965,11 +4955,11 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -4984,11 +4974,11 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5003,11 +4993,11 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5022,11 +5012,11 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5041,10 +5031,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5059,10 +5049,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5077,10 +5067,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5095,10 +5085,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5113,10 +5103,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5130,10 +5120,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5146,10 +5136,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5162,10 +5152,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5178,10 +5168,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5194,10 +5184,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5210,10 +5200,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5226,10 +5216,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5242,10 +5232,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5258,10 +5248,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5274,10 +5264,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5290,19 +5280,17 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5316,19 +5304,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5342,19 +5328,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5368,19 +5352,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5394,19 +5376,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5421,11 +5401,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5440,11 +5420,11 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5459,11 +5439,11 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5478,11 +5458,11 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5497,11 +5477,11 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5516,7 +5496,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5532,7 +5512,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5548,7 +5528,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5564,7 +5544,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5580,7 +5560,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5595,7 +5575,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5609,7 +5589,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5623,7 +5603,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5637,7 +5617,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5651,7 +5631,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5665,7 +5645,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5679,7 +5659,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5693,7 +5673,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5707,7 +5687,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5721,7 +5701,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5735,13 +5715,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5757,13 +5735,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5779,13 +5755,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5801,13 +5775,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5823,13 +5795,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5846,11 +5816,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -5865,11 +5835,11 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -5884,11 +5854,11 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -5903,11 +5873,11 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -5922,11 +5892,11 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -5940,10 +5910,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -5957,10 +5927,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -5974,10 +5944,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -5991,10 +5961,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -6008,10 +5978,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -6025,10 +5995,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6041,10 +6011,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6057,10 +6027,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6073,10 +6043,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6089,10 +6059,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6105,10 +6075,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6121,10 +6091,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6137,10 +6107,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6153,10 +6123,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6169,10 +6139,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6185,19 +6155,17 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6211,19 +6179,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6237,19 +6203,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6263,19 +6227,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6289,19 +6251,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6316,11 +6276,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -6335,11 +6295,11 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -6354,11 +6314,11 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -6373,11 +6333,11 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -6392,11 +6352,11 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -6410,7 +6370,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6425,7 +6385,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6440,7 +6400,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6455,7 +6415,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6470,7 +6430,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6485,7 +6445,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6499,7 +6459,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6513,7 +6473,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6527,7 +6487,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6541,7 +6501,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6555,7 +6515,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6569,7 +6529,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6583,7 +6543,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6597,7 +6557,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6611,7 +6571,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6625,13 +6585,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6647,13 +6605,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6669,13 +6625,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6691,13 +6645,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6713,13 +6665,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6736,11 +6686,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -6755,11 +6705,11 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -6774,11 +6724,11 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -6793,11 +6743,11 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -6812,11 +6762,11 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -6830,10 +6780,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -6847,10 +6797,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -6864,10 +6814,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -6881,10 +6831,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -6898,10 +6848,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -6915,10 +6865,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6931,10 +6881,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6947,10 +6897,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6963,10 +6913,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6979,10 +6929,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6995,10 +6945,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7011,10 +6961,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7027,10 +6977,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7043,10 +6993,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7059,10 +7009,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7075,19 +7025,17 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_relax
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7101,19 +7049,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7127,19 +7073,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7153,19 +7097,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7179,19 +7121,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x11
-; -O0:    subs x10, x10, x9
-; -O0:    subs x12, x12, x9
+; -O0:    cmp x8, x11
+; -O0:    cmp x10, x9
+; -O0:    cmp x12, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w12, w10, #0x1
 ; -O0:    csel x2, x8, x11, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x3, x8, x9, ne
 ; -O0:    bl __aarch64_cas16_acq_rel
 ; -O0:    eor x8, x0, x8
 ; -O0:    eor x9, x1, x9
 ; -O0:    orr x8, x8, x9
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7206,11 +7146,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_relax
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7225,11 +7165,11 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7244,11 +7184,11 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7263,11 +7203,11 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7282,11 +7222,11 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
 ; -O0:    and w8, w0, #0xff
-; -O0:    subs w8, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7300,7 +7240,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7315,7 +7255,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7330,7 +7270,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7345,7 +7285,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7360,7 +7300,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7375,7 +7315,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7389,7 +7329,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7403,7 +7343,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7417,7 +7357,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7431,7 +7371,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7445,7 +7385,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7459,7 +7399,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7473,7 +7413,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7487,7 +7427,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7501,7 +7441,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7515,13 +7455,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7537,13 +7475,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7559,13 +7495,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7581,13 +7515,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7603,13 +7535,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 57cfeb78b6980..111c18774e393 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -5,11 +5,11 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -20,11 +20,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -35,11 +35,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -50,11 +50,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -65,11 +65,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -80,10 +80,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -94,10 +94,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -108,10 +108,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -122,10 +122,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -136,10 +136,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -150,10 +150,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -164,10 +164,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -178,10 +178,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -192,10 +192,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -206,10 +206,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -220,10 +220,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -234,10 +234,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -248,10 +248,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -262,10 +262,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -276,10 +276,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -298,7 +298,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldxp x8, x1, [x0]
@@ -317,7 +317,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -336,7 +336,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldxp x8, x1, [x0]
@@ -355,7 +355,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -374,7 +374,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -385,11 +385,11 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -400,11 +400,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -415,11 +415,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -430,11 +430,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -445,11 +445,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -600,12 +600,12 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -617,12 +617,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -634,12 +634,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -651,12 +651,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -668,12 +668,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -685,11 +685,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -701,11 +701,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -717,11 +717,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -733,11 +733,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -749,11 +749,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -765,11 +765,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -781,11 +781,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -797,11 +797,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -813,11 +813,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -829,11 +829,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -845,11 +845,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -861,11 +861,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -877,11 +877,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -893,11 +893,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -909,11 +909,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -926,7 +926,7 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -935,7 +935,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -948,7 +948,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -957,7 +957,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -970,7 +970,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -979,7 +979,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -992,7 +992,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1001,7 +1001,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1014,7 +1014,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1023,7 +1023,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1035,12 +1035,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1052,12 +1052,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1069,12 +1069,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1086,12 +1086,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1103,12 +1103,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1301,7 +1301,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1315,7 +1315,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1329,7 +1329,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1343,7 +1343,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1357,7 +1357,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1370,12 +1370,12 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1387,12 +1387,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1404,12 +1404,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1421,12 +1421,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1438,12 +1438,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1455,11 +1455,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1471,11 +1471,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1487,11 +1487,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1503,11 +1503,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1519,11 +1519,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1535,11 +1535,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1551,11 +1551,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1567,11 +1567,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1583,11 +1583,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1599,11 +1599,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1615,11 +1615,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1631,11 +1631,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1647,11 +1647,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1663,11 +1663,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1679,11 +1679,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1696,6 +1696,7 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1704,7 +1705,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1717,6 +1718,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1725,7 +1727,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1738,6 +1740,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1746,7 +1749,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1759,6 +1762,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1767,7 +1771,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1780,6 +1784,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1788,7 +1793,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1800,12 +1805,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1817,12 +1822,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1834,12 +1839,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1851,12 +1856,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1868,12 +1873,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2066,6 +2071,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2079,6 +2085,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2092,6 +2099,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2105,6 +2113,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2118,6 +2127,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -2130,12 +2140,12 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2147,12 +2157,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2164,12 +2174,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2181,12 +2191,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2198,12 +2208,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2215,11 +2225,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2231,11 +2241,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2247,11 +2257,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2263,11 +2273,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2279,11 +2289,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2295,11 +2305,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2311,11 +2321,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2327,11 +2337,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2343,11 +2353,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2359,11 +2369,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2375,11 +2385,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2391,11 +2401,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2407,11 +2417,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2423,11 +2433,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2439,11 +2449,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2465,7 +2475,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2488,7 +2498,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2511,7 +2521,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2534,7 +2544,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2557,7 +2567,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2570,12 +2580,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2587,12 +2597,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2604,12 +2614,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2621,12 +2631,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2638,12 +2648,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2910,13 +2920,13 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2929,13 +2939,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2948,13 +2958,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2967,13 +2977,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2986,13 +2996,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3006,11 +3016,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3024,11 +3034,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3042,11 +3052,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3060,11 +3070,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3078,11 +3088,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3096,11 +3106,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3114,11 +3124,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3132,11 +3142,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3150,11 +3160,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3168,11 +3178,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3186,11 +3196,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3204,11 +3214,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3222,11 +3232,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3240,11 +3250,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3258,11 +3268,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3287,7 +3297,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3314,7 +3324,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3341,7 +3351,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3368,7 +3378,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3395,7 +3405,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3410,13 +3420,13 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3429,13 +3439,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3448,13 +3458,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3467,13 +3477,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3486,13 +3496,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3810,12 +3820,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3827,12 +3837,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3844,12 +3854,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3861,12 +3871,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3878,12 +3888,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3895,11 +3905,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3911,11 +3921,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3927,11 +3937,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3943,11 +3953,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3959,11 +3969,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3975,11 +3985,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3991,11 +4001,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4007,11 +4017,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4023,11 +4033,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4039,11 +4049,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4055,11 +4065,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4071,11 +4081,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4087,11 +4097,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4103,11 +4113,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4119,11 +4129,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4145,7 +4155,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4168,7 +4178,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4191,7 +4201,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4214,7 +4224,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4237,7 +4247,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4250,12 +4260,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4267,12 +4277,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4284,12 +4294,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4301,12 +4311,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4318,12 +4328,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4590,12 +4600,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4607,12 +4617,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4624,12 +4634,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4641,12 +4651,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4658,12 +4668,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4675,11 +4685,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4691,11 +4701,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4707,11 +4717,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4723,11 +4733,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4739,11 +4749,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4755,11 +4765,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4771,11 +4781,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4787,11 +4797,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4803,11 +4813,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4819,11 +4829,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4835,11 +4845,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4851,11 +4861,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4867,11 +4877,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4883,11 +4893,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4899,11 +4909,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4925,7 +4935,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4948,7 +4958,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4971,7 +4981,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4994,7 +5004,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5017,7 +5027,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5030,12 +5040,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -5047,12 +5057,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -5064,12 +5074,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -5081,12 +5091,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -5098,12 +5108,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5370,14 +5380,14 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5391,14 +5401,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5412,14 +5422,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5433,14 +5443,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5454,14 +5464,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5475,13 +5485,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5495,13 +5505,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5515,13 +5525,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5535,13 +5545,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5555,13 +5565,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5575,12 +5585,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5593,12 +5603,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5611,12 +5621,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5629,12 +5639,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5647,12 +5657,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5665,12 +5675,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5683,12 +5693,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5701,12 +5711,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5719,12 +5729,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5737,12 +5747,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5755,13 +5765,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5771,7 +5779,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5785,13 +5793,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5801,7 +5807,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5815,13 +5821,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5831,7 +5835,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5845,13 +5849,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5861,7 +5863,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5875,13 +5877,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5891,7 +5891,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5905,14 +5905,14 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5926,14 +5926,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5947,14 +5947,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5968,14 +5968,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5989,14 +5989,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6011,7 +6011,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6027,7 +6027,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6043,7 +6043,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6059,7 +6059,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6075,7 +6075,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6090,7 +6090,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6104,7 +6104,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6118,7 +6118,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6132,7 +6132,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6146,7 +6146,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6160,7 +6160,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6174,7 +6174,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6188,7 +6188,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6202,7 +6202,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6216,7 +6216,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6230,13 +6230,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6252,13 +6250,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6274,13 +6270,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6296,13 +6290,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6318,13 +6310,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6340,14 +6330,14 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6361,14 +6351,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6382,14 +6372,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6403,14 +6393,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6424,14 +6414,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6445,13 +6435,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6465,13 +6455,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6485,13 +6475,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6505,13 +6495,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6525,13 +6515,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6545,12 +6535,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6563,12 +6553,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6581,12 +6571,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6599,12 +6589,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6617,12 +6607,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6635,12 +6625,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6653,12 +6643,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6671,12 +6661,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6689,12 +6679,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6707,12 +6697,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6725,13 +6715,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6741,7 +6729,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6755,13 +6743,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6771,7 +6757,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6785,13 +6771,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6801,7 +6785,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6815,13 +6799,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6831,7 +6813,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6845,13 +6827,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6861,7 +6841,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6875,14 +6855,14 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6896,14 +6876,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6917,14 +6897,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6938,14 +6918,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6959,14 +6939,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6981,7 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6997,7 +6977,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7013,7 +6993,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7029,7 +7009,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7045,7 +7025,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7060,7 +7040,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7074,7 +7054,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7088,7 +7068,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7102,7 +7082,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7116,7 +7096,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7130,7 +7110,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7144,7 +7124,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7158,7 +7138,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7172,7 +7152,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7186,7 +7166,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7200,13 +7180,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7222,13 +7200,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7244,13 +7220,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7266,13 +7240,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7288,13 +7260,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7310,14 +7280,14 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7331,14 +7301,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7352,14 +7322,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7373,14 +7343,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7394,14 +7364,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7415,12 +7385,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7434,12 +7404,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7453,12 +7423,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7472,12 +7442,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7491,12 +7461,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7510,12 +7480,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7528,12 +7498,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7546,12 +7516,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7564,12 +7534,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7582,12 +7552,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7600,12 +7570,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7618,12 +7588,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7636,12 +7606,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7654,12 +7624,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7672,12 +7642,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7690,13 +7660,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7706,7 +7674,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7720,13 +7688,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7736,7 +7702,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7750,13 +7716,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7766,7 +7730,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7780,13 +7744,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7796,7 +7758,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7810,13 +7772,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7826,7 +7786,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7840,14 +7800,14 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7861,14 +7821,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7882,14 +7842,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7903,14 +7863,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7924,14 +7884,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7945,7 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7960,7 +7920,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7975,7 +7935,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7990,7 +7950,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8005,7 +7965,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8020,7 +7980,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8034,7 +7994,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8048,7 +8008,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8062,7 +8022,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8076,7 +8036,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8090,7 +8050,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8104,7 +8064,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8118,7 +8078,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8132,7 +8092,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8146,7 +8106,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8160,13 +8120,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8182,13 +8140,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8204,13 +8160,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8226,13 +8180,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8248,13 +8200,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8270,14 +8220,14 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8291,14 +8241,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8312,14 +8262,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8333,14 +8283,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8354,14 +8304,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8375,12 +8325,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8394,12 +8344,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8413,12 +8363,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8432,12 +8382,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8451,12 +8401,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8470,12 +8420,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8488,12 +8438,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8506,12 +8456,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8524,12 +8474,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8542,12 +8492,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8560,12 +8510,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8578,12 +8528,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8596,12 +8546,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8614,12 +8564,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8632,12 +8582,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8650,13 +8600,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8666,7 +8614,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8680,13 +8628,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8696,7 +8642,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8710,13 +8656,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8726,7 +8670,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8740,13 +8684,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8756,7 +8698,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8770,13 +8712,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8786,7 +8726,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8800,14 +8740,14 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8821,14 +8761,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8842,14 +8782,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8863,14 +8803,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8884,14 +8824,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8905,7 +8845,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8920,7 +8860,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8935,7 +8875,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8950,7 +8890,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8965,7 +8905,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8980,7 +8920,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8994,7 +8934,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9008,7 +8948,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9022,7 +8962,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9036,7 +8976,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9050,7 +8990,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9064,7 +9004,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9078,7 +9018,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9092,7 +9032,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9106,7 +9046,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9120,13 +9060,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9142,13 +9080,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9164,13 +9100,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9186,13 +9120,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9208,13 +9140,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 28ee1a2a70c4d..47978987bbbb9 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -5,11 +5,11 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -20,11 +20,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -35,11 +35,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -50,11 +50,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -65,11 +65,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -80,10 +80,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -94,10 +94,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -108,10 +108,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -122,10 +122,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -136,10 +136,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -150,10 +150,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -164,10 +164,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -178,10 +178,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -192,10 +192,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -206,10 +206,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -220,10 +220,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -234,10 +234,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -248,10 +248,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -262,10 +262,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -276,10 +276,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -298,7 +298,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldxp x8, x1, [x0]
@@ -317,7 +317,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -336,7 +336,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldxp x8, x1, [x0]
@@ -355,7 +355,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -374,7 +374,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -385,11 +385,11 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -400,11 +400,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -415,11 +415,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -430,11 +430,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -445,11 +445,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -600,12 +600,12 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -617,12 +617,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -634,12 +634,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -651,12 +651,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -668,12 +668,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -685,11 +685,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -701,11 +701,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -717,11 +717,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -733,11 +733,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -749,11 +749,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -765,11 +765,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -781,11 +781,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -797,11 +797,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -813,11 +813,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -829,11 +829,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -845,11 +845,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -861,11 +861,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -877,11 +877,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -893,11 +893,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -909,11 +909,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -926,7 +926,7 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -935,7 +935,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -948,7 +948,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -957,7 +957,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -970,7 +970,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -979,7 +979,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -992,7 +992,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1001,7 +1001,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1014,7 +1014,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1023,7 +1023,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1035,12 +1035,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1052,12 +1052,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1069,12 +1069,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1086,12 +1086,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1103,12 +1103,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1301,7 +1301,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1315,7 +1315,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1329,7 +1329,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1343,7 +1343,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1357,7 +1357,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1370,12 +1370,12 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1387,12 +1387,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1404,12 +1404,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1421,12 +1421,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1438,12 +1438,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1455,11 +1455,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1471,11 +1471,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1487,11 +1487,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1503,11 +1503,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1519,11 +1519,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1535,11 +1535,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1551,11 +1551,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1567,11 +1567,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1583,11 +1583,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1599,11 +1599,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1615,11 +1615,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1631,11 +1631,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1647,11 +1647,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1663,11 +1663,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1679,11 +1679,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1696,6 +1696,7 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1704,7 +1705,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1717,6 +1718,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1725,7 +1727,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1738,6 +1740,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1746,7 +1749,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1759,6 +1762,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1767,7 +1771,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1780,6 +1784,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1788,7 +1793,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1800,12 +1805,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1817,12 +1822,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1834,12 +1839,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1851,12 +1856,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1868,12 +1873,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2066,6 +2071,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2079,6 +2085,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2092,6 +2099,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2105,6 +2113,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2118,6 +2127,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -2130,12 +2140,12 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2147,12 +2157,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2164,12 +2174,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2181,12 +2191,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2198,12 +2208,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2215,11 +2225,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2231,11 +2241,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2247,11 +2257,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2263,11 +2273,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2279,11 +2289,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2295,11 +2305,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2311,11 +2321,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2327,11 +2337,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2343,11 +2353,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2359,11 +2369,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2375,11 +2385,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2391,11 +2401,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2407,11 +2417,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2423,11 +2433,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2439,11 +2449,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2465,7 +2475,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2488,7 +2498,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2511,7 +2521,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2534,7 +2544,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2557,7 +2567,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2570,12 +2580,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2587,12 +2597,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2604,12 +2614,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2621,12 +2631,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2638,12 +2648,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2910,13 +2920,13 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2929,13 +2939,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2948,13 +2958,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2967,13 +2977,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2986,13 +2996,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3006,11 +3016,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3024,11 +3034,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3042,11 +3052,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3060,11 +3070,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3078,11 +3088,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3096,11 +3106,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3114,11 +3124,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3132,11 +3142,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3150,11 +3160,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3168,11 +3178,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3186,11 +3196,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3204,11 +3214,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3222,11 +3232,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3240,11 +3250,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3258,11 +3268,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3287,7 +3297,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3314,7 +3324,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3341,7 +3351,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3368,7 +3378,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3395,7 +3405,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3410,13 +3420,13 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3429,13 +3439,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3448,13 +3458,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3467,13 +3477,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3486,13 +3496,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3810,12 +3820,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3827,12 +3837,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3844,12 +3854,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3861,12 +3871,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3878,12 +3888,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3895,11 +3905,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3911,11 +3921,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3927,11 +3937,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3943,11 +3953,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3959,11 +3969,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3975,11 +3985,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3991,11 +4001,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4007,11 +4017,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4023,11 +4033,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4039,11 +4049,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4055,11 +4065,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4071,11 +4081,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4087,11 +4097,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4103,11 +4113,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4119,11 +4129,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4145,7 +4155,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4168,7 +4178,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4191,7 +4201,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4214,7 +4224,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4237,7 +4247,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4250,12 +4260,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4267,12 +4277,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4284,12 +4294,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4301,12 +4311,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4318,12 +4328,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4590,12 +4600,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4607,12 +4617,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4624,12 +4634,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4641,12 +4651,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4658,12 +4668,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4675,11 +4685,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4691,11 +4701,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4707,11 +4717,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4723,11 +4733,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4739,11 +4749,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4755,11 +4765,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4771,11 +4781,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4787,11 +4797,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4803,11 +4813,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4819,11 +4829,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4835,11 +4845,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4851,11 +4861,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4867,11 +4877,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4883,11 +4893,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4899,11 +4909,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4925,7 +4935,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4948,7 +4958,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4971,7 +4981,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4994,7 +5004,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5017,7 +5027,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5030,12 +5040,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -5047,12 +5057,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -5064,12 +5074,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -5081,12 +5091,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -5098,12 +5108,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5370,14 +5380,14 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5391,14 +5401,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5412,14 +5422,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5433,14 +5443,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5454,14 +5464,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5475,13 +5485,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5495,13 +5505,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5515,13 +5525,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5535,13 +5545,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5555,13 +5565,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5575,12 +5585,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5593,12 +5603,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5611,12 +5621,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5629,12 +5639,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5647,12 +5657,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5665,12 +5675,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5683,12 +5693,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5701,12 +5711,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5719,12 +5729,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5737,12 +5747,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5755,13 +5765,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5771,7 +5779,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5785,13 +5793,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5801,7 +5807,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5815,13 +5821,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5831,7 +5835,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5845,13 +5849,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5861,7 +5863,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5875,13 +5877,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5891,7 +5891,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5905,14 +5905,14 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5926,14 +5926,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5947,14 +5947,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5968,14 +5968,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5989,14 +5989,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6011,7 +6011,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6027,7 +6027,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6043,7 +6043,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6059,7 +6059,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6075,7 +6075,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6090,7 +6090,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6104,7 +6104,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6118,7 +6118,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6132,7 +6132,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6146,7 +6146,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6160,7 +6160,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6174,7 +6174,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6188,7 +6188,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6202,7 +6202,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6216,7 +6216,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6230,13 +6230,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6252,13 +6250,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6274,13 +6270,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6296,13 +6290,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6318,13 +6310,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6340,14 +6330,14 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6361,14 +6351,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6382,14 +6372,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6403,14 +6393,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6424,14 +6414,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6445,13 +6435,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6465,13 +6455,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6485,13 +6475,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6505,13 +6495,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6525,13 +6515,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6545,12 +6535,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6563,12 +6553,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6581,12 +6571,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6599,12 +6589,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6617,12 +6607,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6635,12 +6625,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6653,12 +6643,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6671,12 +6661,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6689,12 +6679,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6707,12 +6697,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6725,13 +6715,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6741,7 +6729,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6755,13 +6743,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6771,7 +6757,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6785,13 +6771,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6801,7 +6785,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6815,13 +6799,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6831,7 +6813,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6845,13 +6827,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6861,7 +6841,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6875,14 +6855,14 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6896,14 +6876,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6917,14 +6897,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6938,14 +6918,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6959,14 +6939,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6981,7 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6997,7 +6977,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7013,7 +6993,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7029,7 +7009,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7045,7 +7025,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7060,7 +7040,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7074,7 +7054,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7088,7 +7068,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7102,7 +7082,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7116,7 +7096,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7130,7 +7110,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7144,7 +7124,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7158,7 +7138,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7172,7 +7152,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7186,7 +7166,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7200,13 +7180,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7222,13 +7200,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7244,13 +7220,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7266,13 +7240,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7288,13 +7260,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7310,14 +7280,14 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7331,14 +7301,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7352,14 +7322,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7373,14 +7343,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7394,14 +7364,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7415,12 +7385,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7434,12 +7404,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7453,12 +7423,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7472,12 +7442,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7491,12 +7461,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7510,12 +7480,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7528,12 +7498,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7546,12 +7516,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7564,12 +7534,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7582,12 +7552,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7600,12 +7570,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7618,12 +7588,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7636,12 +7606,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7654,12 +7624,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7672,12 +7642,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7690,13 +7660,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7706,7 +7674,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7720,13 +7688,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7736,7 +7702,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7750,13 +7716,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7766,7 +7730,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7780,13 +7744,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7796,7 +7758,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7810,13 +7772,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7826,7 +7786,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7840,14 +7800,14 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7861,14 +7821,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7882,14 +7842,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7903,14 +7863,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7924,14 +7884,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7945,7 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7960,7 +7920,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7975,7 +7935,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7990,7 +7950,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8005,7 +7965,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8020,7 +7980,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8034,7 +7994,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8048,7 +8008,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8062,7 +8022,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8076,7 +8036,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8090,7 +8050,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8104,7 +8064,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8118,7 +8078,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8132,7 +8092,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8146,7 +8106,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8160,13 +8120,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8182,13 +8140,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8204,13 +8160,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8226,13 +8180,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8248,13 +8200,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8270,14 +8220,14 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8291,14 +8241,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8312,14 +8262,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8333,14 +8283,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8354,14 +8304,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8375,12 +8325,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8394,12 +8344,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8413,12 +8363,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8432,12 +8382,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8451,12 +8401,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8470,12 +8420,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8488,12 +8438,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8506,12 +8456,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8524,12 +8474,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8542,12 +8492,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8560,12 +8510,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8578,12 +8528,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8596,12 +8546,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8614,12 +8564,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8632,12 +8582,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8650,13 +8600,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8666,7 +8614,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8680,13 +8628,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8696,7 +8642,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8710,13 +8656,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8726,7 +8670,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8740,13 +8684,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8756,7 +8698,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8770,13 +8712,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8786,7 +8726,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8800,14 +8740,14 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8821,14 +8761,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8842,14 +8782,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8863,14 +8803,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8884,14 +8824,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8905,7 +8845,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8920,7 +8860,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8935,7 +8875,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8950,7 +8890,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8965,7 +8905,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8980,7 +8920,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8994,7 +8934,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9008,7 +8948,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9022,7 +8962,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9036,7 +8976,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9050,7 +8990,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9064,7 +9004,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9078,7 +9018,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9092,7 +9032,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9106,7 +9046,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9120,13 +9060,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9142,13 +9080,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9164,13 +9100,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9186,13 +9120,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9208,13 +9140,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index 51933261313ea..01a07e9e9232a 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -149,7 +149,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -166,7 +166,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -183,7 +183,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -200,7 +200,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -217,7 +217,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -546,12 +546,12 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -566,12 +566,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -586,12 +586,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -606,12 +606,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -626,12 +626,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x2, x9, x11
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -861,7 +861,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -875,7 +875,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -889,7 +889,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -903,7 +903,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -917,7 +917,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1071,11 +1071,12 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -1090,11 +1091,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -1109,11 +1111,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -1128,11 +1131,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -1147,11 +1151,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x2, x9, x11
+; -O0:    cmp wzr, w11
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -1381,6 +1386,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1394,6 +1400,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1407,6 +1414,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1420,6 +1428,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1433,6 +1442,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -1611,7 +1621,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -1632,7 +1642,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -1653,7 +1663,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -1674,7 +1684,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -1695,7 +1705,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -2009,7 +2019,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2026,7 +2036,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casab w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2043,7 +2053,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    caslb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2060,7 +2070,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2077,7 +2087,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2093,7 +2103,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    cash w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2109,7 +2119,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casah w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2125,7 +2135,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    caslh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2141,7 +2151,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casalh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2157,7 +2167,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casalh w9, w10, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2173,7 +2183,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    cas w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2189,7 +2199,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casa w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2205,7 +2215,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casl w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2221,7 +2231,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casal w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2237,7 +2247,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w9, w8, w9
 ; -O0:    mvn w10, w9
 ; -O0:    casal w9, w10, [x11]
-; -O0:    subs w8, w9, w8
+; -O0:    cmp w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2253,7 +2263,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    cas x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    and x10, x8, x1
@@ -2269,7 +2279,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casa x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    and x10, x8, x1
@@ -2285,7 +2295,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casl x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    and x10, x8, x1
@@ -2301,7 +2311,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casal x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    and x10, x8, x1
@@ -2317,7 +2327,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x9, x8, x9
 ; -O0:    mvn x10, x9
 ; -O0:    casal x9, x10, [x11]
-; -O0:    subs x8, x9, x8
+; -O0:    cmp x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    and x10, x8, x1
@@ -2338,7 +2348,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -2363,7 +2373,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -2388,7 +2398,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -2413,7 +2423,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -2438,7 +2448,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -2459,7 +2469,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    mvn w8, w8
 ; -O0:    casb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2476,7 +2486,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casab w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2493,7 +2503,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    caslb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    and w10, w8, w1
@@ -2510,7 +2520,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2527,7 +2537,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    mvn w8, w8
 ; -O0:    casalb w9, w8, [x11]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2991,7 +3001,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -3012,7 +3022,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -3033,7 +3043,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -3054,7 +3064,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -3075,7 +3085,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -3526,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -3547,7 +3557,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -3568,7 +3578,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -3589,7 +3599,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -3610,7 +3620,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -4055,19 +4065,17 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -4083,19 +4091,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -4111,19 +4117,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -4139,19 +4143,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -4167,19 +4169,17 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -4231,7 +4231,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4247,7 +4247,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4263,7 +4263,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4279,7 +4279,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4295,7 +4295,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4310,7 +4310,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4324,7 +4324,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4338,7 +4338,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4352,7 +4352,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4366,7 +4366,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4380,7 +4380,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4394,7 +4394,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4408,7 +4408,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4422,7 +4422,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4436,7 +4436,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4450,13 +4450,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4472,13 +4470,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4494,13 +4490,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4516,13 +4510,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4538,13 +4530,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4700,19 +4690,17 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -4728,19 +4716,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -4756,19 +4742,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -4784,19 +4768,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -4812,19 +4794,17 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -4876,7 +4856,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4892,7 +4872,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4908,7 +4888,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4924,7 +4904,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4940,7 +4920,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4955,7 +4935,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4969,7 +4949,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4983,7 +4963,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4997,7 +4977,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5011,7 +4991,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5025,7 +5005,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5039,7 +5019,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5053,7 +5033,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5067,7 +5047,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5081,7 +5061,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5095,13 +5075,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5117,13 +5095,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5139,13 +5115,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5161,13 +5135,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5183,13 +5155,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5345,19 +5315,17 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -5373,19 +5341,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -5401,19 +5367,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -5429,19 +5393,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -5457,19 +5419,17 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -5520,7 +5480,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5535,7 +5495,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5550,7 +5510,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5565,7 +5525,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5580,7 +5540,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5595,7 +5555,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5609,7 +5569,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5623,7 +5583,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5637,7 +5597,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5651,7 +5611,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5665,7 +5625,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5679,7 +5639,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5693,7 +5653,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5707,7 +5667,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5721,7 +5681,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5735,13 +5695,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5757,13 +5715,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5779,13 +5735,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5801,13 +5755,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5823,13 +5775,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5985,19 +5935,17 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    casp x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -6013,19 +5961,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspa x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -6041,19 +5987,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspl x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -6069,19 +6013,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -6097,19 +6039,17 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x9, x9, x12
-; -O0:    subs x11, x11, x10
-; -O0:    subs x13, x13, x10
+; -O0:    cmp x9, x12
+; -O0:    cmp x11, x10
+; -O0:    cmp x13, x10
 ; -O0:    csel w11, w9, w11, eq
-; -O0:    ands w13, w11, #0x1
 ; -O0:    csel x2, x9, x12, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x9, x9, x10, ne
 ; -O0:    caspal x0, x1, x2, x3, [x8]
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -6160,7 +6100,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6175,7 +6115,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6190,7 +6130,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6205,7 +6145,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6220,7 +6160,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6235,7 +6175,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6249,7 +6189,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6263,7 +6203,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6277,7 +6217,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6291,7 +6231,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6305,7 +6245,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6319,7 +6259,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6333,7 +6273,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6347,7 +6287,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6361,7 +6301,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6375,13 +6315,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6397,13 +6335,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6419,13 +6355,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6441,13 +6375,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6463,13 +6395,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index 69220a6cf8132..130493cab645d 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -5,11 +5,11 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -20,11 +20,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -35,11 +35,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -50,11 +50,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -65,11 +65,11 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -80,10 +80,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -94,10 +94,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -108,10 +108,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -122,10 +122,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -136,10 +136,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w9, [x11]
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -150,10 +150,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -164,10 +164,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -178,10 +178,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -192,10 +192,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -206,10 +206,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w9, [x11]
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -220,10 +220,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -234,10 +234,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -248,10 +248,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -262,10 +262,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -276,10 +276,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x9, [x11]
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -298,7 +298,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O1:    ldxp x8, x1, [x0]
@@ -317,7 +317,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -336,7 +336,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O1:    ldxp x8, x1, [x0]
@@ -355,7 +355,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -374,7 +374,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O1:    ldaxp x8, x1, [x0]
@@ -385,11 +385,11 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -400,11 +400,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -415,11 +415,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -430,11 +430,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -445,11 +445,11 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w9, [x11]
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -600,12 +600,12 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -617,12 +617,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -634,12 +634,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -651,12 +651,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -668,12 +668,12 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -685,11 +685,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -701,11 +701,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -717,11 +717,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -733,11 +733,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -749,11 +749,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8, uxth
-; -O0:    ldaxrh w9, [x11]
+; -O0:    add w11, w9, w8, uxth
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -765,11 +765,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -781,11 +781,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -797,11 +797,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -813,11 +813,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -829,11 +829,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    add w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -845,11 +845,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -861,11 +861,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -877,11 +877,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -893,11 +893,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -909,11 +909,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    add x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -926,7 +926,7 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -935,7 +935,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -948,7 +948,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -957,7 +957,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -970,7 +970,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -979,7 +979,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -992,7 +992,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1001,7 +1001,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1014,7 +1014,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x14, x8, x10
-; -O0:    subs w10, w10, #1
+; -O0:    cmp w10, #1
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1023,7 +1023,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1035,12 +1035,12 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1052,12 +1052,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1069,12 +1069,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1086,12 +1086,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1103,12 +1103,12 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w8, w10, uxth
-; -O0:    ldaxrb w9, [x11]
+; -O0:    add w11, w9, w10, uxth
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1301,7 +1301,7 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1315,7 +1315,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1329,7 +1329,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1343,7 +1343,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1357,7 +1357,7 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
 ; -O0:    adds x9, x8, x9
-; -O0:    subs w11, w11, #1
+; -O0:    cmp w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1370,12 +1370,12 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1387,12 +1387,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1404,12 +1404,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1421,12 +1421,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1438,12 +1438,12 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1455,11 +1455,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1471,11 +1471,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1487,11 +1487,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1503,11 +1503,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1519,11 +1519,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1535,11 +1535,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1551,11 +1551,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1567,11 +1567,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1583,11 +1583,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1599,11 +1599,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    subs w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1615,11 +1615,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1631,11 +1631,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1647,11 +1647,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1663,11 +1663,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1679,11 +1679,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    subs x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1696,6 +1696,7 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1704,7 +1705,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1717,6 +1718,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1725,7 +1727,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1738,6 +1740,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1746,7 +1749,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1759,6 +1762,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1767,7 +1771,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1780,6 +1784,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x14, x8, x10
+; -O0:    cmp wzr, w10
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
 ; -O0:    cmp x9, x13
@@ -1788,7 +1793,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1800,12 +1805,12 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1817,12 +1822,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1834,12 +1839,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1851,12 +1856,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1868,12 +1873,12 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    subs w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2066,6 +2071,7 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2079,6 +2085,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2092,6 +2099,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2105,6 +2113,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2118,6 +2127,7 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
 ; -O0:    subs x9, x8, x9
+; -O0:    cmp wzr, w11
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
@@ -2130,12 +2140,12 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2147,12 +2157,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2164,12 +2174,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2181,12 +2191,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2198,12 +2208,12 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2215,11 +2225,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2231,11 +2241,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2247,11 +2257,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2263,11 +2273,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2279,11 +2289,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2295,11 +2305,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2311,11 +2321,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2327,11 +2337,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2343,11 +2353,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2359,11 +2369,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    and w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2375,11 +2385,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2391,11 +2401,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2407,11 +2417,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2423,11 +2433,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2439,11 +2449,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    and x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2465,7 +2475,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2488,7 +2498,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2511,7 +2521,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -2534,7 +2544,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2557,7 +2567,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2570,12 +2580,12 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2587,12 +2597,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2604,12 +2614,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2621,12 +2631,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2638,12 +2648,12 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2910,13 +2920,13 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2929,13 +2939,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2948,13 +2958,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2967,13 +2977,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2986,13 +2996,13 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3006,11 +3016,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3024,11 +3034,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3042,11 +3052,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3060,11 +3070,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3078,11 +3088,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3096,11 +3106,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3114,11 +3124,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3132,11 +3142,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3150,11 +3160,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3168,11 +3178,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w9, w8, w9
-; -O0:    mvn w12, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    mvn w11, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3186,11 +3196,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3204,11 +3214,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3222,11 +3232,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3240,11 +3250,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3258,11 +3268,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x9, x8, x9
-; -O0:    mvn x12, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    mvn x11, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3287,7 +3297,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3314,7 +3324,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3341,7 +3351,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -3368,7 +3378,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3395,7 +3405,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -3410,13 +3420,13 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3429,13 +3439,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3448,13 +3458,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3467,13 +3477,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3486,13 +3496,13 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
-; -O0:    and w8, w10, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w9, w10, w9
+; -O0:    mvn w11, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3810,12 +3820,12 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3827,12 +3837,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3844,12 +3854,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3861,12 +3871,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3878,12 +3888,12 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3895,11 +3905,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3911,11 +3921,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3927,11 +3937,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3943,11 +3953,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3959,11 +3969,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3975,11 +3985,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3991,11 +4001,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4007,11 +4017,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4023,11 +4033,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4039,11 +4049,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    orr w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4055,11 +4065,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4071,11 +4081,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4087,11 +4097,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4103,11 +4113,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4119,11 +4129,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    orr x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4145,7 +4155,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4168,7 +4178,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4191,7 +4201,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4214,7 +4224,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4237,7 +4247,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4250,12 +4260,12 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4267,12 +4277,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4284,12 +4294,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4301,12 +4311,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4318,12 +4328,12 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    orr w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4590,12 +4600,12 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4607,12 +4617,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4624,12 +4634,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4641,12 +4651,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4658,12 +4668,12 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4675,11 +4685,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4691,11 +4701,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4707,11 +4717,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4723,11 +4733,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4739,11 +4749,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxrh w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4755,11 +4765,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4771,11 +4781,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4787,11 +4797,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4803,11 +4813,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4819,11 +4829,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w8, w9
-; -O0:    ldaxr w9, [x11]
+; -O0:    eor w11, w8, w9
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4835,11 +4845,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4851,11 +4861,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4867,11 +4877,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4883,11 +4893,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4899,11 +4909,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x8, x9
-; -O0:    ldaxr x9, [x11]
+; -O0:    eor x11, x8, x9
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4925,7 +4935,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4948,7 +4958,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -4971,7 +4981,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -4994,7 +5004,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5017,7 +5027,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5030,12 +5040,12 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -5047,12 +5057,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -5064,12 +5074,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -5081,12 +5091,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -5098,12 +5108,12 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w10, w8
-; -O0:    ldaxrb w9, [x11]
+; -O0:    eor w11, w10, w9
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5370,14 +5380,14 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5391,14 +5401,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5412,14 +5422,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5433,14 +5443,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5454,14 +5464,14 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5475,13 +5485,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5495,13 +5505,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5515,13 +5525,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5535,13 +5545,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5555,13 +5565,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5575,12 +5585,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5593,12 +5603,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5611,12 +5621,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5629,12 +5639,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5647,12 +5657,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, gt
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, gt
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5665,12 +5675,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5683,12 +5693,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5701,12 +5711,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5719,12 +5729,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5737,12 +5747,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, gt
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, gt
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5755,13 +5765,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5771,7 +5779,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5785,13 +5793,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5801,7 +5807,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5815,13 +5821,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5831,7 +5835,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -5845,13 +5849,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5861,7 +5863,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5875,13 +5877,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -5891,7 +5891,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -5905,14 +5905,14 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5926,14 +5926,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5947,14 +5947,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5968,14 +5968,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5989,14 +5989,14 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, gt
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, gt
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6011,7 +6011,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6027,7 +6027,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6043,7 +6043,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6059,7 +6059,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6075,7 +6075,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6090,7 +6090,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6104,7 +6104,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6118,7 +6118,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6132,7 +6132,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6146,7 +6146,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6160,7 +6160,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6174,7 +6174,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6188,7 +6188,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6202,7 +6202,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6216,7 +6216,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6230,13 +6230,11 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6252,13 +6250,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6274,13 +6270,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6296,13 +6290,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6318,13 +6310,11 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6340,14 +6330,14 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6361,14 +6351,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6382,14 +6372,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6403,14 +6393,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6424,14 +6414,14 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6445,13 +6435,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6465,13 +6455,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6485,13 +6475,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6505,13 +6495,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6525,13 +6515,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w8
-; -O0:    subs w10, w10, w9, sxth
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxrh w9, [x11]
+; -O0:    sxth w11, w8
+; -O0:    cmp w11, w9, sxth
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6545,12 +6535,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6563,12 +6553,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6581,12 +6571,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6599,12 +6589,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6617,12 +6607,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, le
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, le
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6635,12 +6625,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6653,12 +6643,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6671,12 +6661,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6689,12 +6679,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6707,12 +6697,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, le
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, le
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6725,13 +6715,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6741,7 +6729,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6755,13 +6743,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6771,7 +6757,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6785,13 +6771,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6801,7 +6785,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -6815,13 +6799,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6831,7 +6813,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6845,13 +6827,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -6861,7 +6841,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -6875,14 +6855,14 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6896,14 +6876,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6917,14 +6897,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6938,14 +6918,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6959,14 +6939,14 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w9, w10
-; -O0:    subs w9, w9, w8, sxtb
-; -O0:    csel w12, w10, w8, le
-; -O0:    ldaxrb w9, [x11]
+; -O0:    sxtb w11, w10
+; -O0:    cmp w11, w9, sxtb
+; -O0:    csel w11, w10, w9, le
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6981,7 +6961,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6997,7 +6977,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7013,7 +6993,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7029,7 +7009,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7045,7 +7025,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7060,7 +7040,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7074,7 +7054,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7088,7 +7068,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7102,7 +7082,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7116,7 +7096,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7130,7 +7110,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7144,7 +7124,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7158,7 +7138,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7172,7 +7152,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7186,7 +7166,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7200,13 +7180,11 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7222,13 +7200,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7244,13 +7220,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7266,13 +7240,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7288,13 +7260,11 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7310,14 +7280,14 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7331,14 +7301,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7352,14 +7322,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7373,14 +7343,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7394,14 +7364,14 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7415,12 +7385,12 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7434,12 +7404,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7453,12 +7423,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7472,12 +7442,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7491,12 +7461,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7510,12 +7480,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7528,12 +7498,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7546,12 +7516,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7564,12 +7534,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7582,12 +7552,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, hi
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, hi
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7600,12 +7570,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7618,12 +7588,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7636,12 +7606,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7654,12 +7624,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7672,12 +7642,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, hi
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, hi
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7690,13 +7660,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7706,7 +7674,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7720,13 +7688,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7736,7 +7702,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7750,13 +7716,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7766,7 +7730,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -7780,13 +7744,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7796,7 +7758,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7810,13 +7772,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -7826,7 +7786,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -7840,14 +7800,14 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7861,14 +7821,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7882,14 +7842,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7903,14 +7863,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7924,14 +7884,14 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, hi
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, hi
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7945,7 +7905,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7960,7 +7920,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7975,7 +7935,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7990,7 +7950,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8005,7 +7965,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8020,7 +7980,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8034,7 +7994,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8048,7 +8008,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8062,7 +8022,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8076,7 +8036,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8090,7 +8050,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8104,7 +8064,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8118,7 +8078,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8132,7 +8092,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8146,7 +8106,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8160,13 +8120,11 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8182,13 +8140,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8204,13 +8160,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8226,13 +8180,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8248,13 +8200,11 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8270,14 +8220,14 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8291,14 +8241,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8312,14 +8262,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8333,14 +8283,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8354,14 +8304,14 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8375,12 +8325,12 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8394,12 +8344,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8413,12 +8363,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8432,12 +8382,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8451,12 +8401,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    subs w10, w10, w9, uxth
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxrh w9, [x11]
+; -O0:    cmp w11, w9, uxth
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxrh w9, [x10]
 ; -O0:    cmp w9, w8, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w8, w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8470,12 +8420,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8488,12 +8438,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8506,12 +8456,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8524,12 +8474,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8542,12 +8492,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w8, w9
-; -O0:    csel w12, w8, w9, ls
-; -O0:    ldaxr w9, [x11]
+; -O0:    cmp w8, w9
+; -O0:    csel w11, w8, w9, ls
+; -O0:    ldaxr w9, [x10]
+; -O0:    cmp w9, w8
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w9, w8
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w8, w9, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8560,12 +8510,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8578,12 +8528,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8596,12 +8546,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8614,12 +8564,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8632,12 +8582,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x8, x9
-; -O0:    csel x12, x8, x9, ls
-; -O0:    ldaxr x9, [x11]
+; -O0:    cmp x8, x9
+; -O0:    csel x11, x8, x9, ls
+; -O0:    ldaxr x9, [x10]
+; -O0:    cmp x9, x8
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x9, x8
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x8, x9, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8650,13 +8600,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8666,7 +8614,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8680,13 +8628,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8696,7 +8642,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8710,13 +8656,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8726,7 +8670,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -8740,13 +8684,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8756,7 +8698,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8770,13 +8712,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x12
-; -O0:    subs x10, x10, x9
-; -O0:    subs x13, x13, x9
+; -O0:    cmp x8, x12
+; -O0:    cmp x10, x9
+; -O0:    cmp x13, x9
 ; -O0:    csel w10, w8, w10, eq
-; -O0:    ands w13, w10, #0x1
 ; -O0:    csel x14, x8, x12, ne
-; -O0:    ands w10, w10, #0x1
 ; -O0:    csel x15, x8, x9, ne
 ; -O0:    ldaxp x10, x9, [x11]
 ; -O0:    cmp x10, x12
@@ -8786,7 +8726,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    eor x8, x10, x8
 ; -O0:    eor x11, x9, x11
 ; -O0:    orr x8, x8, x11
-; -O0:    subs x8, x8, #0
+; -O0:    cmp x8, #0
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -8800,14 +8740,14 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8821,14 +8761,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8842,14 +8782,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8863,14 +8803,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8884,14 +8824,14 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w10, #0xff
-; -O0:    subs w9, w9, w8, uxtb
-; -O0:    csel w12, w10, w8, ls
-; -O0:    ldaxrb w9, [x11]
+; -O0:    and w11, w10, #0xff
+; -O0:    cmp w11, w9, uxtb
+; -O0:    csel w11, w10, w9, ls
+; -O0:    ldaxrb w9, [x8]
 ; -O0:    cmp w9, w10, uxtb
-; -O0:    stlxrb w8, w12, [x11]
+; -O0:    stlxrb wzr, w11, [x8]
 ; -O0:    and w8, w9, #0xff
-; -O0:    subs w8, w8, w10, uxtb
+; -O0:    cmp w8, w10, uxtb
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8905,7 +8845,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8920,7 +8860,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8935,7 +8875,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8950,7 +8890,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8965,7 +8905,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8980,7 +8920,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8994,7 +8934,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9008,7 +8948,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9022,7 +8962,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9036,7 +8976,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9050,7 +8990,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9064,7 +9004,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9078,7 +9018,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9092,7 +9032,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9106,7 +9046,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9120,13 +9060,11 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9142,13 +9080,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9164,13 +9100,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9186,13 +9120,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -9208,13 +9140,11 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x9
-; -O0:    subs x11, x11, x10
-; -O0:    subs x12, x12, x10
+; -O0:    cmp x8, x9
+; -O0:    cmp x11, x10
+; -O0:    cmp x12, x10
 ; -O0:    csel w11, w8, w11, eq
-; -O0:    ands w12, w11, #0x1
 ; -O0:    csel x9, x8, x9, ne
-; -O0:    ands w11, w11, #0x1
 ; -O0:    csel x8, x8, x10, ne
 ; -O0:    bl __atomic_compare_exchange
 ;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a_fp.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a_fp.ll
index 4a1b7642eff4c..9042cf4c80c2b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a_fp.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a_fp.ll
@@ -5,10 +5,10 @@
 
 define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -19,10 +19,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -33,10 +33,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -47,10 +47,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -61,10 +61,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -77,10 +77,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -95,10 +95,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -113,10 +113,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -131,10 +131,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -149,10 +149,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -165,10 +165,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -179,10 +179,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -193,10 +193,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -207,10 +207,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -221,10 +221,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -235,10 +235,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -249,10 +249,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -263,10 +263,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -277,10 +277,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -291,10 +291,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -480,10 +480,10 @@ define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -494,10 +494,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -508,10 +508,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -522,10 +522,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -536,10 +536,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -552,10 +552,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -570,10 +570,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -588,10 +588,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -606,10 +606,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -624,10 +624,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -640,10 +640,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -654,10 +654,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -668,10 +668,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -682,10 +682,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -696,10 +696,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -710,10 +710,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -724,10 +724,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -738,10 +738,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -752,10 +752,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -766,10 +766,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -955,10 +955,10 @@ define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -969,10 +969,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -983,10 +983,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -997,10 +997,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1011,10 +1011,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1027,10 +1027,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1045,10 +1045,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -1063,10 +1063,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -1081,10 +1081,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -1099,10 +1099,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -1115,10 +1115,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1129,10 +1129,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1143,10 +1143,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1157,10 +1157,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1171,10 +1171,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1185,10 +1185,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -1199,10 +1199,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -1213,10 +1213,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -1227,10 +1227,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -1241,10 +1241,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -1430,10 +1430,10 @@ define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1444,10 +1444,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1458,10 +1458,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1472,10 +1472,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1486,10 +1486,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1502,10 +1502,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1520,10 +1520,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -1538,10 +1538,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -1556,10 +1556,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -1574,10 +1574,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -1590,10 +1590,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1604,10 +1604,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1618,10 +1618,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1632,10 +1632,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1646,10 +1646,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1660,10 +1660,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -1674,10 +1674,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -1688,10 +1688,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -1702,10 +1702,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -1716,10 +1716,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -1905,10 +1905,10 @@ define dso_local double @atomicrmw_fmin_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1919,10 +1919,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_monotonic(ptr %ptr, half
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1933,10 +1933,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_acquire(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1947,10 +1947,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_release(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1961,10 +1961,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_acq_rel(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1977,10 +1977,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_monotonic(ptr %ptr, b
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1995,10 +1995,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_acquire(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -2013,10 +2013,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_release(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -2031,10 +2031,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_acq_rel(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -2049,10 +2049,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -2065,10 +2065,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2079,10 +2079,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_monotonic(ptr %ptr, flo
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2093,10 +2093,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_acquire(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2107,10 +2107,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_release(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2121,10 +2121,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_acq_rel(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2135,10 +2135,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_seq_cst(ptr %ptr, float
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -2149,10 +2149,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_monotonic(ptr %ptr, d
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -2163,10 +2163,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_acquire(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -2177,10 +2177,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_release(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -2191,10 +2191,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_acq_rel(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -2380,10 +2380,10 @@ define dso_local double @atomicrmw_fmaximum_double_unaligned_seq_cst(ptr %ptr, d
 
 define dso_local half @atomicrmw_fminimum_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_monotonic:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2394,10 +2394,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_monotonic(ptr %ptr, half
 
 define dso_local half @atomicrmw_fminimum_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_acquire:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2408,10 +2408,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_acquire(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_release:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2422,10 +2422,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_release(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_acq_rel:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2436,10 +2436,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_acq_rel(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_seq_cst:
-; -O0:    ldaxrh w0, [x9]
-; -O0:    cmp w0, w10, uxth
-; -O0:    stlxrh w8, w11, [x9]
-; -O0:    subs w8, w8, w0, uxth
+; -O0:    ldaxrh w0, [x8]
+; -O0:    cmp w0, w9, uxth
+; -O0:    stlxrh wzr, w10, [x8]
+; -O0:    cmp w8, w0, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2452,10 +2452,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_monotonic(ptr %ptr, b
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -2470,10 +2470,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_acquire(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -2488,10 +2488,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_release(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -2506,10 +2506,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_acq_rel(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -2524,10 +2524,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -2540,10 +2540,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 
 define dso_local float @atomicrmw_fminimum_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_monotonic:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2554,10 +2554,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_monotonic(ptr %ptr, flo
 
 define dso_local float @atomicrmw_fminimum_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_acquire:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2568,10 +2568,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_acquire(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_release:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2582,10 +2582,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_release(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_acq_rel:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2596,10 +2596,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_acq_rel(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_seq_cst:
-; -O0:    ldaxr w0, [x9]
-; -O0:    cmp w0, w10
-; -O0:    stlxr w8, w11, [x9]
-; -O0:    subs w8, w0, w8
+; -O0:    ldaxr w0, [x8]
+; -O0:    cmp w0, w9
+; -O0:    stlxr wzr, w10, [x8]
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2610,10 +2610,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_seq_cst(ptr %ptr, float
 
 define dso_local double @atomicrmw_fminimum_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_monotonic:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -2624,10 +2624,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_monotonic(ptr %ptr, d
 
 define dso_local double @atomicrmw_fminimum_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_acquire:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -2638,10 +2638,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_acquire(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_release:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -2652,10 +2652,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_release(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_acq_rel:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -2666,10 +2666,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_acq_rel(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_seq_cst:
-; -O0:    ldaxr x0, [x9]
-; -O0:    cmp x0, x10
-; -O0:    stlxr w8, x11, [x9]
-; -O0:    subs x8, x0, x8
+; -O0:    ldaxr x0, [x8]
+; -O0:    cmp x0, x9
+; -O0:    stlxr wzr, x10, [x8]
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-lse2.ll
index 46dce540cafcd..2343cfc7e11b8 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-lse2.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc.ll
index e44ce9025bcb0..6fecbe150b6b7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
index a2b995074c81d..f21298cd3b745 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-v8a.ll
index 11f95812655fb..9a4bf4116167d 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-v8a.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
index c1c5c53aa7df2..63ec906ebde4f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
@@ -118,7 +118,7 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_unordered:
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -131,7 +131,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -144,7 +144,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_release:
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -157,7 +157,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
index d1047d84e2956..01cb09b738aa1 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
@@ -122,7 +122,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -139,7 +139,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -156,7 +156,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -173,7 +173,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8_1a.ll
index 6a51222e31092..2af43e58f5ee7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8_1a.ll
@@ -118,7 +118,7 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
 define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_unordered:
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -133,7 +133,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_monotonic:
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -148,7 +148,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_release:
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -163,7 +163,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
 ; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
index 1a79c73355143..f06202ff7dfda 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
@@ -122,7 +122,7 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -139,7 +139,7 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -156,7 +156,7 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -173,7 +173,7 @@ define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
index ea6ebe08936f5..c27f3d67c196b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll
@@ -5,10 +5,10 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -19,10 +19,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -33,10 +33,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -47,10 +47,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -61,10 +61,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -75,10 +75,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -89,10 +89,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -103,10 +103,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -117,10 +117,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -131,10 +131,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -145,10 +145,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -159,10 +159,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -173,10 +173,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -187,10 +187,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -201,10 +201,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -215,10 +215,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -229,10 +229,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -243,10 +243,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -257,10 +257,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -271,10 +271,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -290,7 +290,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -307,7 +307,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -324,7 +324,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -341,7 +341,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -358,7 +358,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -370,10 +370,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -384,10 +384,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -398,10 +398,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -412,10 +412,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -426,10 +426,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -580,11 +580,11 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -596,11 +596,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -612,11 +612,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -628,11 +628,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -644,11 +644,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -660,11 +660,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -676,11 +676,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -692,11 +692,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -708,11 +708,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -724,11 +724,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -740,11 +740,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -756,11 +756,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -772,11 +772,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -788,11 +788,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -804,11 +804,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -820,11 +820,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -836,11 +836,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -852,11 +852,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -868,11 +868,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -884,11 +884,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -906,7 +906,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -925,7 +925,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -944,7 +944,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -963,7 +963,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -982,7 +982,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -995,11 +995,11 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1011,11 +1011,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1027,11 +1027,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1043,11 +1043,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1059,11 +1059,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1320,11 +1320,11 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1336,11 +1336,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1352,11 +1352,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1368,11 +1368,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1384,11 +1384,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1400,11 +1400,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1416,11 +1416,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1432,11 +1432,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1448,11 +1448,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1464,11 +1464,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1480,11 +1480,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1496,11 +1496,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1512,11 +1512,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1528,11 +1528,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1544,11 +1544,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1560,11 +1560,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1576,11 +1576,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1592,11 +1592,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1608,11 +1608,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1624,11 +1624,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1646,7 +1646,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1665,7 +1665,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1684,7 +1684,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1703,7 +1703,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1722,7 +1722,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1735,11 +1735,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1751,11 +1751,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1767,11 +1767,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1783,11 +1783,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1799,11 +1799,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2060,11 +2060,11 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2076,11 +2076,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2092,11 +2092,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2108,11 +2108,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2124,11 +2124,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2140,11 +2140,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2156,11 +2156,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2172,11 +2172,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2188,11 +2188,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2204,11 +2204,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2220,11 +2220,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2236,11 +2236,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2252,11 +2252,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2268,11 +2268,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2284,11 +2284,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2300,11 +2300,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2316,11 +2316,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2332,11 +2332,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2348,11 +2348,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2364,11 +2364,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2387,7 +2387,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -2408,7 +2408,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -2429,7 +2429,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -2450,7 +2450,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -2471,7 +2471,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2485,11 +2485,11 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2501,11 +2501,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2517,11 +2517,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2533,11 +2533,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2549,11 +2549,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2821,11 +2821,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2839,11 +2839,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2857,11 +2857,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2875,11 +2875,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2893,11 +2893,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2911,11 +2911,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2929,11 +2929,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2947,11 +2947,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2965,11 +2965,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2983,11 +2983,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3001,11 +3001,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3019,11 +3019,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3037,11 +3037,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3055,11 +3055,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3073,11 +3073,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3091,11 +3091,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3109,11 +3109,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3127,11 +3127,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3145,11 +3145,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3163,11 +3163,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3189,7 +3189,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -3214,7 +3214,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -3239,7 +3239,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -3264,7 +3264,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -3289,7 +3289,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -3306,11 +3306,11 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3324,11 +3324,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3342,11 +3342,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3360,11 +3360,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3378,11 +3378,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3700,11 +3700,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3716,11 +3716,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3732,11 +3732,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3748,11 +3748,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3764,11 +3764,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3780,11 +3780,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3796,11 +3796,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3812,11 +3812,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3828,11 +3828,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3844,11 +3844,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3860,11 +3860,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3876,11 +3876,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3892,11 +3892,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3908,11 +3908,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3924,11 +3924,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3940,11 +3940,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3956,11 +3956,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3972,11 +3972,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3988,11 +3988,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4004,11 +4004,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4027,7 +4027,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -4048,7 +4048,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -4069,7 +4069,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -4090,7 +4090,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -4111,7 +4111,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -4125,11 +4125,11 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4141,11 +4141,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4157,11 +4157,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4173,11 +4173,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4189,11 +4189,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4460,11 +4460,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4476,11 +4476,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4492,11 +4492,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4508,11 +4508,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4524,11 +4524,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4540,11 +4540,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4556,11 +4556,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4572,11 +4572,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4588,11 +4588,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4604,11 +4604,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4620,11 +4620,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4636,11 +4636,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4652,11 +4652,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4668,11 +4668,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4684,11 +4684,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4700,11 +4700,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4716,11 +4716,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4732,11 +4732,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4748,11 +4748,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4764,11 +4764,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4787,7 +4787,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -4808,7 +4808,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -4850,7 +4850,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -4871,7 +4871,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4885,11 +4885,11 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4901,11 +4901,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4917,11 +4917,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4933,11 +4933,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4949,11 +4949,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5220,13 +5220,13 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5240,13 +5240,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5260,13 +5260,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5280,13 +5280,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5300,13 +5300,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5320,13 +5320,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5340,13 +5340,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5360,13 +5360,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5380,13 +5380,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5400,13 +5400,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5420,12 +5420,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5438,12 +5438,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5456,12 +5456,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5474,12 +5474,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5492,12 +5492,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5510,12 +5510,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5528,12 +5528,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5546,12 +5546,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5564,12 +5564,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5582,12 +5582,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5600,7 +5600,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5608,7 +5608,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -5623,7 +5623,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5631,7 +5631,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -5646,7 +5646,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5654,7 +5654,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -5669,7 +5669,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5677,7 +5677,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -5692,7 +5692,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5700,7 +5700,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -5715,13 +5715,13 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5735,13 +5735,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5755,13 +5755,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5775,13 +5775,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5795,13 +5795,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5816,7 +5816,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5832,7 +5832,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5848,7 +5848,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5864,7 +5864,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5880,7 +5880,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5895,7 +5895,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5909,7 +5909,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5923,7 +5923,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5937,7 +5937,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5951,7 +5951,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5965,7 +5965,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5979,7 +5979,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5993,7 +5993,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6007,7 +6007,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6021,7 +6021,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6035,7 +6035,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6052,7 +6052,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6069,7 +6069,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6103,7 +6103,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6120,13 +6120,13 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6140,13 +6140,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6160,13 +6160,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6180,13 +6180,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6200,13 +6200,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6220,13 +6220,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6240,13 +6240,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6260,13 +6260,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6280,13 +6280,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6300,13 +6300,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6320,12 +6320,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6338,12 +6338,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6356,12 +6356,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6374,12 +6374,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6392,12 +6392,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6410,12 +6410,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6428,12 +6428,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6446,12 +6446,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6464,12 +6464,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6482,12 +6482,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6500,7 +6500,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6508,7 +6508,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -6523,7 +6523,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6531,7 +6531,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -6546,7 +6546,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6554,7 +6554,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -6569,7 +6569,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6577,7 +6577,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -6592,7 +6592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6600,7 +6600,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -6615,13 +6615,13 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6635,13 +6635,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6655,13 +6655,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6675,13 +6675,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6695,13 +6695,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6716,7 +6716,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6732,7 +6732,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6748,7 +6748,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6764,7 +6764,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6780,7 +6780,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6795,7 +6795,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6809,7 +6809,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6823,7 +6823,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6837,7 +6837,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6851,7 +6851,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6865,7 +6865,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6879,7 +6879,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6893,7 +6893,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6907,7 +6907,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6921,7 +6921,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6935,7 +6935,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6952,7 +6952,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6969,7 +6969,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6986,7 +6986,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7003,7 +7003,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7020,13 +7020,13 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7040,13 +7040,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7060,13 +7060,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7080,13 +7080,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7100,13 +7100,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7120,13 +7120,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7140,13 +7140,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7160,13 +7160,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7180,13 +7180,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7200,13 +7200,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7220,12 +7220,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7238,12 +7238,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7256,12 +7256,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7274,12 +7274,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7292,12 +7292,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7310,12 +7310,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7328,12 +7328,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7346,12 +7346,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7364,12 +7364,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7382,12 +7382,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7400,7 +7400,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7408,7 +7408,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -7423,7 +7423,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7431,7 +7431,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -7446,7 +7446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7454,7 +7454,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -7469,7 +7469,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7477,7 +7477,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -7492,7 +7492,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7500,7 +7500,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -7515,13 +7515,13 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7535,13 +7535,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7555,13 +7555,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7575,13 +7575,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7595,13 +7595,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7616,7 +7616,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7632,7 +7632,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7648,7 +7648,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7664,7 +7664,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7680,7 +7680,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7695,7 +7695,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7709,7 +7709,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7723,7 +7723,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7737,7 +7737,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7751,7 +7751,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7765,7 +7765,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7779,7 +7779,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7793,7 +7793,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7807,7 +7807,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7821,7 +7821,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7835,7 +7835,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7852,7 +7852,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7869,7 +7869,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7886,7 +7886,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7903,7 +7903,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7920,13 +7920,13 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7940,13 +7940,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7960,13 +7960,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7980,13 +7980,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8000,13 +8000,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8020,13 +8020,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8040,13 +8040,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8060,13 +8060,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8080,13 +8080,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8100,13 +8100,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8120,12 +8120,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8138,12 +8138,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8156,12 +8156,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8174,12 +8174,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8192,12 +8192,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8210,12 +8210,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8228,12 +8228,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8246,12 +8246,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8264,12 +8264,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8282,12 +8282,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8300,7 +8300,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8308,7 +8308,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -8323,7 +8323,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8331,7 +8331,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -8346,7 +8346,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8354,7 +8354,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -8369,7 +8369,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8377,7 +8377,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -8392,7 +8392,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8400,7 +8400,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -8415,13 +8415,13 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8435,13 +8435,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8455,13 +8455,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8475,13 +8475,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8495,13 +8495,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8516,7 +8516,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8532,7 +8532,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8548,7 +8548,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8564,7 +8564,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8580,7 +8580,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8595,7 +8595,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8609,7 +8609,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8623,7 +8623,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8637,7 +8637,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8651,7 +8651,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8665,7 +8665,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8679,7 +8679,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8693,7 +8693,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8707,7 +8707,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8721,7 +8721,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8735,7 +8735,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8752,7 +8752,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8769,7 +8769,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8786,7 +8786,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8803,7 +8803,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index 29663f2633852..f219e712c2ae2 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -512,7 +512,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -529,7 +529,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -546,7 +546,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -563,7 +563,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -580,7 +580,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -1097,7 +1097,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1114,7 +1114,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1131,7 +1131,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1148,7 +1148,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1165,7 +1165,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -2008,7 +2008,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2024,7 +2024,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casab w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2040,7 +2040,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2056,7 +2056,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2072,7 +2072,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2088,7 +2088,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    cash w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2104,7 +2104,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casah w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2120,7 +2120,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2136,7 +2136,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2152,7 +2152,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2168,7 +2168,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    cas w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2184,7 +2184,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casa w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2200,7 +2200,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casl w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2216,7 +2216,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casal w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2232,7 +2232,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casal w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2248,7 +2248,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    cas x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    and x10, x8, x1
@@ -2264,7 +2264,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casa x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    and x10, x8, x1
@@ -2280,7 +2280,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casl x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    and x10, x8, x1
@@ -2296,7 +2296,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casal x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    and x10, x8, x1
@@ -2312,7 +2312,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casal x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    and x10, x8, x1
@@ -2330,7 +2330,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -2353,7 +2353,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -2376,7 +2376,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -2399,7 +2399,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -2422,7 +2422,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -2443,7 +2443,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2459,7 +2459,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casab w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2475,7 +2475,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    and w10, w8, w1
@@ -2491,7 +2491,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2507,7 +2507,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -3467,7 +3467,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -3486,7 +3486,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -3505,7 +3505,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -3524,7 +3524,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -3970,11 +3970,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -3991,11 +3991,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -4012,11 +4012,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -4033,11 +4033,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -4054,11 +4054,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -4111,7 +4111,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4127,7 +4127,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4143,7 +4143,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4159,7 +4159,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4175,7 +4175,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4190,7 +4190,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4204,7 +4204,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4218,7 +4218,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4232,7 +4232,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4246,7 +4246,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4260,7 +4260,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4274,7 +4274,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4288,7 +4288,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4302,7 +4302,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4316,7 +4316,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4330,7 +4330,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4347,7 +4347,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4364,7 +4364,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4381,7 +4381,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4398,7 +4398,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4555,11 +4555,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -4576,11 +4576,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -4597,11 +4597,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -4618,11 +4618,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -4639,11 +4639,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -4696,7 +4696,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4712,7 +4712,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4728,7 +4728,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4744,7 +4744,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4760,7 +4760,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4775,7 +4775,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4789,7 +4789,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4803,7 +4803,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4817,7 +4817,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4831,7 +4831,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4845,7 +4845,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4859,7 +4859,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4873,7 +4873,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4887,7 +4887,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4901,7 +4901,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4915,7 +4915,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -4932,7 +4932,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -4949,7 +4949,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -4966,7 +4966,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -4983,7 +4983,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5140,11 +5140,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -5161,11 +5161,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -5182,11 +5182,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -5203,11 +5203,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -5224,11 +5224,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -5281,7 +5281,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5297,7 +5297,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5313,7 +5313,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5329,7 +5329,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5345,7 +5345,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5360,7 +5360,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5374,7 +5374,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5388,7 +5388,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5402,7 +5402,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5416,7 +5416,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5430,7 +5430,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5444,7 +5444,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5458,7 +5458,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5472,7 +5472,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5486,7 +5486,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5500,7 +5500,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5517,7 +5517,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5534,7 +5534,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5551,7 +5551,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5568,7 +5568,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5725,11 +5725,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -5746,11 +5746,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -5767,11 +5767,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -5788,11 +5788,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -5809,11 +5809,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -5866,7 +5866,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5882,7 +5882,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5898,7 +5898,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5914,7 +5914,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5930,7 +5930,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5945,7 +5945,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5959,7 +5959,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5973,7 +5973,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5987,7 +5987,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6001,7 +6001,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6015,7 +6015,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6029,7 +6029,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6043,7 +6043,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6057,7 +6057,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6071,7 +6071,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6085,7 +6085,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6102,7 +6102,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6119,7 +6119,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6136,7 +6136,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6153,7 +6153,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
index 9f0c6b07d7b7c..2a2fa71372a43 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lsfe.ll
@@ -379,10 +379,10 @@ define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -393,10 +393,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -407,10 +407,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -421,10 +421,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -435,10 +435,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -451,10 +451,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -469,10 +469,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -487,10 +487,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -505,10 +505,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -523,10 +523,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -539,10 +539,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -553,10 +553,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -567,10 +567,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -581,10 +581,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -595,10 +595,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -609,10 +609,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -623,10 +623,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -637,10 +637,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -651,10 +651,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -665,10 +665,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
index 45737a1ae6be4..c022c3a0769e7 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll
@@ -146,7 +146,7 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -159,7 +159,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -172,7 +172,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -185,7 +185,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -198,7 +198,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -527,7 +527,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x3, x1, x9
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -557,7 +557,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x3, x1, x9
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -572,7 +572,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -587,7 +587,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x3, x1, x9
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1117,7 +1117,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1132,7 +1132,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x3, x1, x9
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1147,7 +1147,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1162,7 +1162,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x3, x1, x9
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1718,7 +1718,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    and x2, x0, x9
 ; -O0:    and x3, x1, x8
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -1735,7 +1735,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    and x2, x0, x9
 ; -O0:    and x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -1752,7 +1752,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    and x2, x0, x9
 ; -O0:    and x3, x1, x8
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -1769,7 +1769,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    and x2, x0, x9
 ; -O0:    and x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -1786,7 +1786,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    and x2, x0, x9
 ; -O0:    and x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2118,7 +2118,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2134,7 +2134,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2150,7 +2150,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2166,7 +2166,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2182,7 +2182,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2198,7 +2198,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2214,7 +2214,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2230,7 +2230,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2246,7 +2246,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2262,7 +2262,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2278,7 +2278,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2294,7 +2294,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2310,7 +2310,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2326,7 +2326,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2342,7 +2342,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2358,7 +2358,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2374,7 +2374,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2390,7 +2390,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2406,7 +2406,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2422,7 +2422,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x0, x8
 ; -O0:    mvn x1, x8
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2440,7 +2440,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    mvn x2, x9
 ; -O0:    mvn x3, x8
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -2461,7 +2461,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x9
 ; -O0:    mvn x3, x8
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -2482,7 +2482,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x9
 ; -O0:    mvn x3, x8
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -2503,7 +2503,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x9
 ; -O0:    mvn x3, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -2524,7 +2524,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x9
 ; -O0:    mvn x3, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -2543,7 +2543,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2559,7 +2559,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2575,7 +2575,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2591,7 +2591,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2607,7 +2607,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w0, w8
 ; -O0:    mvn w1, w8
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    orr x2, x0, x9
 ; -O0:    orr x3, x1, x8
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -3085,7 +3085,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x0, x9
 ; -O0:    orr x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -3102,7 +3102,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x0, x9
 ; -O0:    orr x3, x1, x8
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -3119,7 +3119,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x0, x9
 ; -O0:    orr x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -3136,7 +3136,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x0, x9
 ; -O0:    orr x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -3583,7 +3583,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x2, x0, x9
 ; -O0:    eor x3, x1, x8
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -3600,7 +3600,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x0, x9
 ; -O0:    eor x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -3617,7 +3617,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x0, x9
 ; -O0:    eor x3, x1, x8
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -3634,7 +3634,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x0, x9
 ; -O0:    eor x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -3651,7 +3651,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x0, x9
 ; -O0:    eor x3, x1, x8
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -3956,10 +3956,10 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -3974,10 +3974,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -3992,10 +3992,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -4010,10 +4010,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -4028,10 +4028,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -4046,10 +4046,10 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -4064,10 +4064,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -4082,10 +4082,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -4100,10 +4100,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -4118,10 +4118,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -4135,10 +4135,10 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4151,10 +4151,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4167,10 +4167,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4183,10 +4183,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4199,10 +4199,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4215,10 +4215,10 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4231,10 +4231,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4247,10 +4247,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4263,10 +4263,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4279,10 +4279,10 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, gt
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4295,11 +4295,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lt
 ; -O0:    csel x3, x1, x8, lt
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -4314,11 +4314,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lt
 ; -O0:    csel x3, x1, x8, lt
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -4333,11 +4333,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lt
 ; -O0:    csel x3, x1, x8, lt
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -4352,11 +4352,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lt
 ; -O0:    csel x3, x1, x8, lt
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -4371,11 +4371,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lt
 ; -O0:    csel x3, x1, x8, lt
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -4391,10 +4391,10 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -4409,10 +4409,10 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -4427,10 +4427,10 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -4445,10 +4445,10 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -4463,10 +4463,10 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, gt
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -4481,7 +4481,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4497,7 +4497,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4513,7 +4513,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4529,7 +4529,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4545,7 +4545,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4560,7 +4560,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4574,7 +4574,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4588,7 +4588,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4602,7 +4602,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4616,7 +4616,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4630,7 +4630,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4644,7 +4644,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4658,7 +4658,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4672,7 +4672,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4686,7 +4686,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4700,7 +4700,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4734,7 +4734,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4751,7 +4751,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4768,7 +4768,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4786,10 +4786,10 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -4804,10 +4804,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -4822,10 +4822,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -4840,10 +4840,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -4858,10 +4858,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -4876,10 +4876,10 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -4894,10 +4894,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -4912,10 +4912,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -4930,10 +4930,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -4948,10 +4948,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O0:    sxth w9, w0
-; -O0:    subs w9, w9, w8, sxth
+; -O0:    cmp w9, w8, sxth
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8, uxth
+; -O0:    cmp w0, w8, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -4965,10 +4965,10 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4981,10 +4981,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4997,10 +4997,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5013,10 +5013,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5029,10 +5029,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5045,10 +5045,10 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5061,10 +5061,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5077,10 +5077,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5093,10 +5093,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5109,10 +5109,10 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, le
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5125,11 +5125,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, ge
 ; -O0:    csel x3, x1, x8, ge
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -5144,11 +5144,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, ge
 ; -O0:    csel x3, x1, x8, ge
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -5163,11 +5163,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, ge
 ; -O0:    csel x3, x1, x8, ge
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -5182,11 +5182,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, ge
 ; -O0:    csel x3, x1, x8, ge
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -5201,11 +5201,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, ge
 ; -O0:    csel x3, x1, x8, ge
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -5221,10 +5221,10 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5239,10 +5239,10 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5257,10 +5257,10 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5275,10 +5275,10 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5293,10 +5293,10 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O0:    sxtb w9, w0
-; -O0:    subs w9, w9, w8, sxtb
+; -O0:    cmp w9, w8, sxtb
 ; -O0:    csel w1, w0, w8, le
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8, uxtb
+; -O0:    cmp w0, w8, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5311,7 +5311,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5327,7 +5327,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5343,7 +5343,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5359,7 +5359,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5375,7 +5375,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5390,7 +5390,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5404,7 +5404,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5418,7 +5418,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5432,7 +5432,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5446,7 +5446,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5460,7 +5460,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5474,7 +5474,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5488,7 +5488,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5502,7 +5502,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5516,7 +5516,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5530,7 +5530,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5547,7 +5547,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5564,7 +5564,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5581,7 +5581,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5598,7 +5598,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5616,10 +5616,10 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -5634,10 +5634,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -5652,10 +5652,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -5670,10 +5670,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -5688,10 +5688,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -5706,10 +5706,10 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -5724,10 +5724,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -5742,10 +5742,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -5760,10 +5760,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -5778,10 +5778,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -5795,10 +5795,10 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5811,10 +5811,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5827,10 +5827,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5843,10 +5843,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5859,10 +5859,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5875,10 +5875,10 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5891,10 +5891,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5907,10 +5907,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5923,10 +5923,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5939,10 +5939,10 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, hi
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5955,11 +5955,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lo
 ; -O0:    csel x3, x1, x8, lo
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -5974,11 +5974,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lo
 ; -O0:    csel x3, x1, x8, lo
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -5993,11 +5993,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lo
 ; -O0:    csel x3, x1, x8, lo
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -6012,11 +6012,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lo
 ; -O0:    csel x3, x1, x8, lo
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -6031,11 +6031,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, lo
 ; -O0:    csel x3, x1, x8, lo
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -6051,10 +6051,10 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -6069,10 +6069,10 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -6087,10 +6087,10 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -6105,10 +6105,10 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -6123,10 +6123,10 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, hi
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -6141,7 +6141,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6157,7 +6157,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6173,7 +6173,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6189,7 +6189,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6205,7 +6205,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6220,7 +6220,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6234,7 +6234,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6248,7 +6248,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6262,7 +6262,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6276,7 +6276,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6290,7 +6290,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6304,7 +6304,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6318,7 +6318,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6332,7 +6332,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6346,7 +6346,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6360,7 +6360,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -6377,7 +6377,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -6394,7 +6394,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -6411,7 +6411,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -6428,7 +6428,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -6446,10 +6446,10 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -6464,10 +6464,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -6482,10 +6482,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -6500,10 +6500,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -6518,10 +6518,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -6536,10 +6536,10 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -6554,10 +6554,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -6572,10 +6572,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -6590,10 +6590,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -6608,10 +6608,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O0:    and w9, w0, #0xffff
-; -O0:    subs w9, w9, w8, uxth
+; -O0:    cmp w9, w8, uxth
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas2_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -6625,10 +6625,10 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6641,10 +6641,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6657,10 +6657,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6673,10 +6673,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6689,10 +6689,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w9, w0, w8
+; -O0:    cmp w0, w8
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas4_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6705,10 +6705,10 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_relax
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6721,10 +6721,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6737,10 +6737,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6753,10 +6753,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6769,10 +6769,10 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x9, x0, x8
+; -O0:    cmp x0, x8
 ; -O0:    csel x1, x0, x8, ls
 ; -O0:    bl __aarch64_cas8_acq_rel
-; -O0:    subs x8, x0, x8
+; -O0:    cmp x0, x8
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6785,11 +6785,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, hs
 ; -O0:    csel x3, x1, x8, hs
 ; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -6804,11 +6804,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, hs
 ; -O0:    csel x3, x1, x8, hs
 ; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -6823,11 +6823,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, hs
 ; -O0:    csel x3, x1, x8, hs
 ; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -6842,11 +6842,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, hs
 ; -O0:    csel x3, x1, x8, hs
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -6861,11 +6861,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x10, x8, x1
+; -O0:    cmp x8, x1
 ; -O0:    csel x2, x0, x9, hs
 ; -O0:    csel x3, x1, x8, hs
 ; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
+; -O0:    cmp x10, x11
 ; -O0:    ccmp x8, x9, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -6881,10 +6881,10 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_relax
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -6899,10 +6899,10 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -6917,10 +6917,10 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -6935,10 +6935,10 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -6953,10 +6953,10 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O0:    and w9, w0, #0xff
-; -O0:    subs w9, w9, w8, uxtb
+; -O0:    cmp w9, w8, uxtb
 ; -O0:    csel w1, w0, w8, ls
 ; -O0:    bl __aarch64_cas1_acq_rel
-; -O0:    subs w8, w0, w8
+; -O0:    cmp w0, w8
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -6971,7 +6971,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6987,7 +6987,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7003,7 +7003,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7019,7 +7019,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7035,7 +7035,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7050,7 +7050,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7064,7 +7064,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7078,7 +7078,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7092,7 +7092,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7106,7 +7106,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7120,7 +7120,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7134,7 +7134,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7148,7 +7148,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7162,7 +7162,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7176,7 +7176,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7190,7 +7190,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -7207,7 +7207,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -7224,7 +7224,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -7241,7 +7241,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -7258,7 +7258,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
index 8200e2c2c2b32..8b3d8ee5afc5c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll
@@ -5,10 +5,10 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -19,10 +19,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -33,10 +33,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -47,10 +47,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -61,10 +61,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -75,10 +75,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -89,10 +89,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -103,10 +103,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -117,10 +117,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -131,10 +131,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -145,10 +145,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -159,10 +159,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -173,10 +173,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -187,10 +187,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -201,10 +201,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -215,10 +215,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -229,10 +229,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -243,10 +243,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -257,10 +257,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -271,10 +271,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -290,7 +290,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -307,7 +307,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -324,7 +324,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -341,7 +341,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -358,7 +358,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -370,10 +370,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -384,10 +384,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -398,10 +398,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -412,10 +412,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -426,10 +426,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -580,11 +580,11 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -596,11 +596,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -612,11 +612,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -628,11 +628,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -644,11 +644,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -660,11 +660,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -676,11 +676,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -692,11 +692,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -708,11 +708,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -724,11 +724,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -740,11 +740,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -756,11 +756,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -772,11 +772,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -788,11 +788,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -804,11 +804,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -820,11 +820,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -836,11 +836,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -852,11 +852,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -868,11 +868,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -884,11 +884,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -906,7 +906,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -925,7 +925,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -944,7 +944,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -963,7 +963,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -982,7 +982,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -995,11 +995,11 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1011,11 +1011,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1027,11 +1027,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1043,11 +1043,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1059,11 +1059,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1320,11 +1320,11 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1336,11 +1336,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1352,11 +1352,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1368,11 +1368,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1384,11 +1384,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1400,11 +1400,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1416,11 +1416,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1432,11 +1432,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1448,11 +1448,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1464,11 +1464,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1480,11 +1480,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1496,11 +1496,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1512,11 +1512,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1528,11 +1528,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1544,11 +1544,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1560,11 +1560,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1576,11 +1576,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1592,11 +1592,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1608,11 +1608,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1624,11 +1624,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1646,7 +1646,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1665,7 +1665,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1684,7 +1684,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1703,7 +1703,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1722,7 +1722,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1735,11 +1735,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1751,11 +1751,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1767,11 +1767,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1783,11 +1783,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1799,11 +1799,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2060,11 +2060,11 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2076,11 +2076,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2092,11 +2092,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2108,11 +2108,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2124,11 +2124,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2140,11 +2140,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2156,11 +2156,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2172,11 +2172,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2188,11 +2188,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2204,11 +2204,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2220,11 +2220,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2236,11 +2236,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2252,11 +2252,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2268,11 +2268,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2284,11 +2284,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2300,11 +2300,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2316,11 +2316,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2332,11 +2332,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2348,11 +2348,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2364,11 +2364,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2387,7 +2387,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -2408,7 +2408,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -2429,7 +2429,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -2450,7 +2450,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -2471,7 +2471,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2485,11 +2485,11 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2501,11 +2501,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2517,11 +2517,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2533,11 +2533,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2549,11 +2549,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2821,11 +2821,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2839,11 +2839,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2857,11 +2857,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2875,11 +2875,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2893,11 +2893,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2911,11 +2911,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2929,11 +2929,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2947,11 +2947,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2965,11 +2965,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2983,11 +2983,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3001,11 +3001,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3019,11 +3019,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3037,11 +3037,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3055,11 +3055,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3073,11 +3073,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3091,11 +3091,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3109,11 +3109,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3127,11 +3127,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3145,11 +3145,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3163,11 +3163,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3189,7 +3189,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -3214,7 +3214,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -3239,7 +3239,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -3264,7 +3264,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -3289,7 +3289,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -3306,11 +3306,11 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3324,11 +3324,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3342,11 +3342,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3360,11 +3360,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3378,11 +3378,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3700,11 +3700,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3716,11 +3716,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3732,11 +3732,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3748,11 +3748,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3764,11 +3764,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3780,11 +3780,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3796,11 +3796,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3812,11 +3812,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3828,11 +3828,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3844,11 +3844,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3860,11 +3860,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3876,11 +3876,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3892,11 +3892,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3908,11 +3908,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3924,11 +3924,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3940,11 +3940,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3956,11 +3956,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3972,11 +3972,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3988,11 +3988,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4004,11 +4004,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4027,7 +4027,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -4048,7 +4048,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -4069,7 +4069,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -4090,7 +4090,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -4111,7 +4111,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -4125,11 +4125,11 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4141,11 +4141,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4157,11 +4157,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4173,11 +4173,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4189,11 +4189,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4460,11 +4460,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4476,11 +4476,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4492,11 +4492,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4508,11 +4508,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4524,11 +4524,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4540,11 +4540,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4556,11 +4556,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4572,11 +4572,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4588,11 +4588,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4604,11 +4604,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4620,11 +4620,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4636,11 +4636,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4652,11 +4652,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4668,11 +4668,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4684,11 +4684,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4700,11 +4700,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4716,11 +4716,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4732,11 +4732,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4748,11 +4748,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4764,11 +4764,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4787,7 +4787,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -4808,7 +4808,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -4850,7 +4850,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -4871,7 +4871,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4885,11 +4885,11 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4901,11 +4901,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4917,11 +4917,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4933,11 +4933,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4949,11 +4949,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5220,13 +5220,13 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5240,13 +5240,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5260,13 +5260,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5280,13 +5280,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5300,13 +5300,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5320,13 +5320,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5340,13 +5340,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5360,13 +5360,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5380,13 +5380,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5400,13 +5400,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5420,12 +5420,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5438,12 +5438,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5456,12 +5456,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5474,12 +5474,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5492,12 +5492,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5510,12 +5510,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5528,12 +5528,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5546,12 +5546,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5564,12 +5564,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5582,12 +5582,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5600,7 +5600,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5608,7 +5608,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -5623,7 +5623,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5631,7 +5631,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -5646,7 +5646,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5654,7 +5654,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -5669,7 +5669,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5677,7 +5677,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -5692,7 +5692,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5700,7 +5700,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -5715,13 +5715,13 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5735,13 +5735,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5755,13 +5755,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5775,13 +5775,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5795,13 +5795,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5816,7 +5816,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5832,7 +5832,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5848,7 +5848,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5864,7 +5864,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5880,7 +5880,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5895,7 +5895,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5909,7 +5909,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5923,7 +5923,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5937,7 +5937,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5951,7 +5951,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5965,7 +5965,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5979,7 +5979,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5993,7 +5993,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6007,7 +6007,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6021,7 +6021,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6035,7 +6035,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6052,7 +6052,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6069,7 +6069,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6103,7 +6103,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6120,13 +6120,13 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6140,13 +6140,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6160,13 +6160,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6180,13 +6180,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6200,13 +6200,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6220,13 +6220,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6240,13 +6240,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6260,13 +6260,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6280,13 +6280,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6300,13 +6300,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6320,12 +6320,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6338,12 +6338,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6356,12 +6356,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6374,12 +6374,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6392,12 +6392,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6410,12 +6410,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6428,12 +6428,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6446,12 +6446,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6464,12 +6464,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6482,12 +6482,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6500,7 +6500,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6508,7 +6508,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -6523,7 +6523,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6531,7 +6531,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -6546,7 +6546,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6554,7 +6554,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -6569,7 +6569,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6577,7 +6577,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -6592,7 +6592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6600,7 +6600,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -6615,13 +6615,13 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6635,13 +6635,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6655,13 +6655,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6675,13 +6675,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6695,13 +6695,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6716,7 +6716,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6732,7 +6732,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6748,7 +6748,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6764,7 +6764,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6780,7 +6780,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6795,7 +6795,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6809,7 +6809,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6823,7 +6823,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6837,7 +6837,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6851,7 +6851,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6865,7 +6865,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6879,7 +6879,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6893,7 +6893,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6907,7 +6907,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6921,7 +6921,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6935,7 +6935,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6952,7 +6952,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6969,7 +6969,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6986,7 +6986,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7003,7 +7003,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7020,13 +7020,13 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7040,13 +7040,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7060,13 +7060,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7080,13 +7080,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7100,13 +7100,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7120,13 +7120,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7140,13 +7140,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7160,13 +7160,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7180,13 +7180,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7200,13 +7200,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7220,12 +7220,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7238,12 +7238,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7256,12 +7256,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7274,12 +7274,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7292,12 +7292,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7310,12 +7310,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7328,12 +7328,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7346,12 +7346,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7364,12 +7364,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7382,12 +7382,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7400,7 +7400,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7408,7 +7408,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -7423,7 +7423,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7431,7 +7431,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -7446,7 +7446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7454,7 +7454,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -7469,7 +7469,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7477,7 +7477,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -7492,7 +7492,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7500,7 +7500,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -7515,13 +7515,13 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7535,13 +7535,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7555,13 +7555,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7575,13 +7575,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7595,13 +7595,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7616,7 +7616,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7632,7 +7632,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7648,7 +7648,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7664,7 +7664,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7680,7 +7680,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7695,7 +7695,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7709,7 +7709,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7723,7 +7723,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7737,7 +7737,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7751,7 +7751,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7765,7 +7765,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7779,7 +7779,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7793,7 +7793,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7807,7 +7807,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7821,7 +7821,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7835,7 +7835,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7852,7 +7852,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7869,7 +7869,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7886,7 +7886,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7903,7 +7903,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7920,13 +7920,13 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7940,13 +7940,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7960,13 +7960,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7980,13 +7980,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8000,13 +8000,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8020,13 +8020,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8040,13 +8040,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8060,13 +8060,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8080,13 +8080,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8100,13 +8100,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8120,12 +8120,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8138,12 +8138,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8156,12 +8156,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8174,12 +8174,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8192,12 +8192,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8210,12 +8210,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8228,12 +8228,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8246,12 +8246,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8264,12 +8264,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8282,12 +8282,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8300,7 +8300,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8308,7 +8308,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -8323,7 +8323,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8331,7 +8331,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -8346,7 +8346,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8354,7 +8354,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -8369,7 +8369,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8377,7 +8377,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -8392,7 +8392,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8400,7 +8400,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -8415,13 +8415,13 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8435,13 +8435,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8455,13 +8455,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8475,13 +8475,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8495,13 +8495,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8516,7 +8516,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8532,7 +8532,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8548,7 +8548,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8564,7 +8564,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8580,7 +8580,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8595,7 +8595,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8609,7 +8609,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8623,7 +8623,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8637,7 +8637,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8651,7 +8651,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8665,7 +8665,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8679,7 +8679,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8693,7 +8693,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8707,7 +8707,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8721,7 +8721,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8735,7 +8735,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8752,7 +8752,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8769,7 +8769,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8786,7 +8786,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8803,7 +8803,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
index 6df602200ebb6..b666995fea648 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
@@ -5,10 +5,10 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -19,10 +19,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -33,10 +33,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -47,10 +47,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -61,10 +61,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -75,10 +75,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -89,10 +89,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -103,10 +103,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -117,10 +117,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -131,10 +131,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -145,10 +145,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -159,10 +159,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -173,10 +173,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -187,10 +187,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -201,10 +201,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -215,10 +215,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -229,10 +229,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -243,10 +243,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -257,10 +257,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -271,10 +271,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -290,7 +290,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -307,7 +307,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -324,7 +324,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -341,7 +341,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -358,7 +358,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -370,10 +370,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -384,10 +384,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -398,10 +398,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -412,10 +412,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -426,10 +426,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -580,11 +580,11 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -596,11 +596,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -612,11 +612,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -628,11 +628,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -644,11 +644,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -660,11 +660,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -676,11 +676,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -692,11 +692,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -708,11 +708,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -724,11 +724,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -740,11 +740,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -756,11 +756,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -772,11 +772,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -788,11 +788,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -804,11 +804,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -820,11 +820,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -836,11 +836,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -852,11 +852,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -868,11 +868,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -884,11 +884,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -906,7 +906,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -925,7 +925,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -944,7 +944,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -963,7 +963,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -982,7 +982,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -995,11 +995,11 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1011,11 +1011,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1027,11 +1027,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1043,11 +1043,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1059,11 +1059,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1320,11 +1320,11 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1336,11 +1336,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1352,11 +1352,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1368,11 +1368,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1384,11 +1384,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1400,11 +1400,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1416,11 +1416,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1432,11 +1432,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1448,11 +1448,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1464,11 +1464,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1480,11 +1480,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1496,11 +1496,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1512,11 +1512,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1528,11 +1528,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1544,11 +1544,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1560,11 +1560,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1576,11 +1576,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1592,11 +1592,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1608,11 +1608,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1624,11 +1624,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1646,7 +1646,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1665,7 +1665,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1684,7 +1684,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1703,7 +1703,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1722,7 +1722,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1735,11 +1735,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1751,11 +1751,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1767,11 +1767,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1783,11 +1783,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1799,11 +1799,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2060,11 +2060,11 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2076,11 +2076,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2092,11 +2092,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2108,11 +2108,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2124,11 +2124,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2140,11 +2140,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2156,11 +2156,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2172,11 +2172,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2188,11 +2188,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2204,11 +2204,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2220,11 +2220,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2236,11 +2236,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2252,11 +2252,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2268,11 +2268,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2284,11 +2284,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2300,11 +2300,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2316,11 +2316,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2332,11 +2332,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2348,11 +2348,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2364,11 +2364,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2387,7 +2387,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -2408,7 +2408,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -2429,7 +2429,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -2450,7 +2450,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -2471,7 +2471,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2485,11 +2485,11 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2501,11 +2501,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2517,11 +2517,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2533,11 +2533,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2549,11 +2549,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2821,11 +2821,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2839,11 +2839,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2857,11 +2857,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2875,11 +2875,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2893,11 +2893,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2911,11 +2911,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2929,11 +2929,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2947,11 +2947,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2965,11 +2965,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2983,11 +2983,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3001,11 +3001,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3019,11 +3019,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3037,11 +3037,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3055,11 +3055,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3073,11 +3073,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3091,11 +3091,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3109,11 +3109,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3127,11 +3127,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3145,11 +3145,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3163,11 +3163,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3189,7 +3189,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -3214,7 +3214,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -3239,7 +3239,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -3264,7 +3264,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -3289,7 +3289,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -3306,11 +3306,11 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3324,11 +3324,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3342,11 +3342,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3360,11 +3360,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3378,11 +3378,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3700,11 +3700,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3716,11 +3716,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3732,11 +3732,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3748,11 +3748,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3764,11 +3764,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3780,11 +3780,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3796,11 +3796,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3812,11 +3812,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3828,11 +3828,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3844,11 +3844,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3860,11 +3860,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3876,11 +3876,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3892,11 +3892,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3908,11 +3908,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3924,11 +3924,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3940,11 +3940,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3956,11 +3956,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3972,11 +3972,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3988,11 +3988,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4004,11 +4004,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4027,7 +4027,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -4048,7 +4048,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -4069,7 +4069,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -4090,7 +4090,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -4111,7 +4111,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -4125,11 +4125,11 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4141,11 +4141,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4157,11 +4157,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4173,11 +4173,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4189,11 +4189,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4460,11 +4460,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4476,11 +4476,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4492,11 +4492,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4508,11 +4508,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4524,11 +4524,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4540,11 +4540,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4556,11 +4556,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4572,11 +4572,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4588,11 +4588,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4604,11 +4604,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4620,11 +4620,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4636,11 +4636,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4652,11 +4652,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4668,11 +4668,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4684,11 +4684,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4700,11 +4700,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4716,11 +4716,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4732,11 +4732,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4748,11 +4748,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4764,11 +4764,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4787,7 +4787,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -4808,7 +4808,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -4850,7 +4850,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -4871,7 +4871,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4885,11 +4885,11 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4901,11 +4901,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4917,11 +4917,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4933,11 +4933,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4949,11 +4949,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5220,13 +5220,13 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5240,13 +5240,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5260,13 +5260,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5280,13 +5280,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5300,13 +5300,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5320,13 +5320,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5340,13 +5340,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5360,13 +5360,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5380,13 +5380,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5400,13 +5400,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5420,12 +5420,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5438,12 +5438,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5456,12 +5456,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5474,12 +5474,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5492,12 +5492,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5510,12 +5510,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5528,12 +5528,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5546,12 +5546,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5564,12 +5564,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5582,12 +5582,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5600,7 +5600,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5608,7 +5608,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -5623,7 +5623,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5631,7 +5631,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -5646,7 +5646,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5654,7 +5654,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -5669,7 +5669,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5677,7 +5677,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -5692,7 +5692,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5700,7 +5700,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -5715,13 +5715,13 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5735,13 +5735,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5755,13 +5755,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5775,13 +5775,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5795,13 +5795,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5816,7 +5816,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5832,7 +5832,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5848,7 +5848,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5864,7 +5864,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5880,7 +5880,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5895,7 +5895,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5909,7 +5909,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5923,7 +5923,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5937,7 +5937,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5951,7 +5951,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5965,7 +5965,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5979,7 +5979,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5993,7 +5993,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6007,7 +6007,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6021,7 +6021,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6035,7 +6035,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6052,7 +6052,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6069,7 +6069,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6103,7 +6103,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6120,13 +6120,13 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6140,13 +6140,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6160,13 +6160,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6180,13 +6180,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6200,13 +6200,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6220,13 +6220,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6240,13 +6240,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6260,13 +6260,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6280,13 +6280,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6300,13 +6300,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6320,12 +6320,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6338,12 +6338,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6356,12 +6356,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6374,12 +6374,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6392,12 +6392,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6410,12 +6410,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6428,12 +6428,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6446,12 +6446,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6464,12 +6464,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6482,12 +6482,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6500,7 +6500,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6508,7 +6508,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -6523,7 +6523,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6531,7 +6531,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -6546,7 +6546,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6554,7 +6554,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -6569,7 +6569,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6577,7 +6577,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -6592,7 +6592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6600,7 +6600,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -6615,13 +6615,13 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6635,13 +6635,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6655,13 +6655,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6675,13 +6675,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6695,13 +6695,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6716,7 +6716,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6732,7 +6732,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6748,7 +6748,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6764,7 +6764,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6780,7 +6780,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6795,7 +6795,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6809,7 +6809,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6823,7 +6823,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6837,7 +6837,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6851,7 +6851,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6865,7 +6865,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6879,7 +6879,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6893,7 +6893,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6907,7 +6907,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6921,7 +6921,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6935,7 +6935,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6952,7 +6952,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6969,7 +6969,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6986,7 +6986,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7003,7 +7003,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7020,13 +7020,13 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7040,13 +7040,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7060,13 +7060,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7080,13 +7080,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7100,13 +7100,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7120,13 +7120,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7140,13 +7140,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7160,13 +7160,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7180,13 +7180,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7200,13 +7200,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7220,12 +7220,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7238,12 +7238,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7256,12 +7256,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7274,12 +7274,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7292,12 +7292,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7310,12 +7310,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7328,12 +7328,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7346,12 +7346,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7364,12 +7364,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7382,12 +7382,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7400,7 +7400,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7408,7 +7408,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -7423,7 +7423,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7431,7 +7431,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -7446,7 +7446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7454,7 +7454,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -7469,7 +7469,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7477,7 +7477,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -7492,7 +7492,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7500,7 +7500,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -7515,13 +7515,13 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7535,13 +7535,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7555,13 +7555,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7575,13 +7575,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7595,13 +7595,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7616,7 +7616,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7632,7 +7632,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7648,7 +7648,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7664,7 +7664,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7680,7 +7680,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7695,7 +7695,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7709,7 +7709,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7723,7 +7723,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7737,7 +7737,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7751,7 +7751,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7765,7 +7765,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7779,7 +7779,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7793,7 +7793,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7807,7 +7807,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7821,7 +7821,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7835,7 +7835,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7852,7 +7852,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7869,7 +7869,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7886,7 +7886,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7903,7 +7903,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7920,13 +7920,13 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7940,13 +7940,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7960,13 +7960,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7980,13 +7980,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8000,13 +8000,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8020,13 +8020,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8040,13 +8040,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8060,13 +8060,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8080,13 +8080,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8100,13 +8100,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8120,12 +8120,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8138,12 +8138,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8156,12 +8156,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8174,12 +8174,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8192,12 +8192,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8210,12 +8210,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8228,12 +8228,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8246,12 +8246,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8264,12 +8264,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8282,12 +8282,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8300,7 +8300,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8308,7 +8308,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -8323,7 +8323,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8331,7 +8331,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -8346,7 +8346,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8354,7 +8354,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -8369,7 +8369,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8377,7 +8377,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -8392,7 +8392,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8400,7 +8400,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -8415,13 +8415,13 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8435,13 +8435,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8455,13 +8455,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8475,13 +8475,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8495,13 +8495,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8516,7 +8516,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8532,7 +8532,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8548,7 +8548,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8564,7 +8564,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8580,7 +8580,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8595,7 +8595,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8609,7 +8609,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8623,7 +8623,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8637,7 +8637,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8651,7 +8651,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8665,7 +8665,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8679,7 +8679,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8693,7 +8693,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8707,7 +8707,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8721,7 +8721,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8735,7 +8735,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8752,7 +8752,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8769,7 +8769,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8786,7 +8786,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8803,7 +8803,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index 34cd68c94c6a0..660976485b776 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -146,7 +146,7 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -161,7 +161,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -176,7 +176,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -191,7 +191,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -206,7 +206,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -537,7 +537,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O0:    adds x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -554,7 +554,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -571,7 +571,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -588,7 +588,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -605,7 +605,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O0:    adds x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -1122,7 +1122,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O0:    subs x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1139,7 +1139,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1156,7 +1156,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1173,7 +1173,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1190,7 +1190,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O0:    subs x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1668,7 +1668,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    and x2, x11, x12
 ; -O0:    and x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -1687,7 +1687,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    and x2, x11, x12
 ; -O0:    and x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -1706,7 +1706,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    and x2, x11, x12
 ; -O0:    and x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -1725,7 +1725,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    and x2, x11, x12
 ; -O0:    and x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -1744,7 +1744,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    and x2, x11, x12
 ; -O0:    and x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2058,7 +2058,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2074,7 +2074,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casab w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2090,7 +2090,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2106,7 +2106,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2122,7 +2122,7 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2138,7 +2138,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    cash w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2154,7 +2154,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casah w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2170,7 +2170,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2186,7 +2186,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2202,7 +2202,7 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalh w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxth
+; -O0:    cmp w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2218,7 +2218,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    cas w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2234,7 +2234,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casa w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2250,7 +2250,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casl w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    and w10, w8, w1
@@ -2266,7 +2266,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casal w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2282,7 +2282,7 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casal w8, w10, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -2298,7 +2298,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    cas x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    and x10, x8, x1
@@ -2314,7 +2314,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casa x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    and x10, x8, x1
@@ -2330,7 +2330,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casl x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    and x10, x8, x1
@@ -2346,7 +2346,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casal x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    and x10, x8, x1
@@ -2362,7 +2362,7 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0:    and x8, x9, x8
 ; -O0:    mvn x10, x8
 ; -O0:    casal x8, x10, [x11]
-; -O0:    subs x9, x8, x9
+; -O0:    cmp x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    and x10, x8, x1
@@ -2380,7 +2380,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -2403,7 +2403,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -2426,7 +2426,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -2449,7 +2449,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -2472,7 +2472,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    mvn x2, x12
 ; -O0:    mvn x9, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -2493,7 +2493,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    and w10, w8, w1
@@ -2509,7 +2509,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casab w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    and w10, w8, w1
@@ -2525,7 +2525,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    caslb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    and w10, w8, w1
@@ -2541,7 +2541,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    and w10, w8, w1
@@ -2557,7 +2557,7 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0:    and w8, w9, w8
 ; -O0:    mvn w10, w8
 ; -O0:    casalb w8, w10, [x11]
-; -O0:    subs w9, w8, w9, uxtb
+; -O0:    cmp w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    and w10, w8, w1
@@ -3018,7 +3018,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    orr x2, x11, x12
 ; -O0:    orr x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -3037,7 +3037,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x11, x12
 ; -O0:    orr x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -3056,7 +3056,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x11, x12
 ; -O0:    orr x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -3075,7 +3075,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x11, x12
 ; -O0:    orr x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -3094,7 +3094,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    orr x2, x11, x12
 ; -O0:    orr x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -3543,7 +3543,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -3562,7 +3562,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -3581,7 +3581,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -3600,7 +3600,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -3619,7 +3619,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    eor x2, x11, x12
 ; -O0:    eor x9, x10, x9
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4065,11 +4065,11 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -4086,11 +4086,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -4107,11 +4107,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -4128,11 +4128,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -4149,11 +4149,11 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    csel x2, x11, x12, lt
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -4206,7 +4206,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4222,7 +4222,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4238,7 +4238,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4254,7 +4254,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4270,7 +4270,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4285,7 +4285,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4299,7 +4299,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4313,7 +4313,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4327,7 +4327,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4341,7 +4341,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4355,7 +4355,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4369,7 +4369,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4383,7 +4383,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4397,7 +4397,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4411,7 +4411,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4425,7 +4425,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4442,7 +4442,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4459,7 +4459,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4476,7 +4476,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4493,7 +4493,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -4650,11 +4650,11 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -4671,11 +4671,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -4692,11 +4692,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -4713,11 +4713,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -4734,11 +4734,11 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    csel x2, x11, x12, ge
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -4791,7 +4791,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4807,7 +4807,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4823,7 +4823,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4839,7 +4839,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4855,7 +4855,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4870,7 +4870,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4884,7 +4884,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4898,7 +4898,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4912,7 +4912,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4926,7 +4926,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4940,7 +4940,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4954,7 +4954,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4968,7 +4968,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4982,7 +4982,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -4996,7 +4996,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5010,7 +5010,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5027,7 +5027,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5044,7 +5044,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5061,7 +5061,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5078,7 +5078,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -5235,11 +5235,11 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -5256,11 +5256,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -5277,11 +5277,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -5298,11 +5298,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -5319,11 +5319,11 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    csel x2, x11, x12, lo
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -5376,7 +5376,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5392,7 +5392,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5408,7 +5408,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5424,7 +5424,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5440,7 +5440,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5455,7 +5455,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5469,7 +5469,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5483,7 +5483,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5497,7 +5497,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5511,7 +5511,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5525,7 +5525,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5539,7 +5539,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5553,7 +5553,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5567,7 +5567,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5581,7 +5581,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5595,7 +5595,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5612,7 +5612,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5629,7 +5629,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5646,7 +5646,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5663,7 +5663,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -5820,11 +5820,11 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -5841,11 +5841,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -5862,11 +5862,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -5883,11 +5883,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -5904,11 +5904,11 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x13, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    csel x2, x11, x12, hs
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x9, x11
+; -O0:    cmp x9, x11
 ; -O0:    ccmp x8, x10, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -5961,7 +5961,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5977,7 +5977,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5993,7 +5993,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6009,7 +6009,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6025,7 +6025,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6040,7 +6040,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6054,7 +6054,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6068,7 +6068,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6082,7 +6082,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6096,7 +6096,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6110,7 +6110,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6124,7 +6124,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6138,7 +6138,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6152,7 +6152,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6166,7 +6166,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6180,7 +6180,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6197,7 +6197,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6214,7 +6214,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6231,7 +6231,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -6248,7 +6248,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
index 60fd7dab58220..ec2d7b1afc0c6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll
@@ -5,10 +5,10 @@
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -19,10 +19,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -33,10 +33,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -47,10 +47,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -61,10 +61,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -75,10 +75,10 @@ define dso_local i8 @atomicrmw_xchg_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_monotonic:
 ; -O1:    ldxrh w0, [x8]
@@ -89,10 +89,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acquire:
 ; -O1:    ldaxrh w0, [x8]
@@ -103,10 +103,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_release:
 ; -O1:    ldxrh w0, [x8]
@@ -117,10 +117,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w0, [x8]
@@ -131,10 +131,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xchg_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w0, [x8]
@@ -145,10 +145,10 @@ define dso_local i16 @atomicrmw_xchg_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_monotonic:
 ; -O1:    ldxr w0, [x8]
@@ -159,10 +159,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acquire:
 ; -O1:    ldaxr w0, [x8]
@@ -173,10 +173,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_release:
 ; -O1:    ldxr w0, [x8]
@@ -187,10 +187,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_acq_rel:
 ; -O1:    ldaxr w0, [x8]
@@ -201,10 +201,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i32_aligned_seq_cst:
 ; -O1:    ldaxr w0, [x8]
@@ -215,10 +215,10 @@ define dso_local i32 @atomicrmw_xchg_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -229,10 +229,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -243,10 +243,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -257,10 +257,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -271,10 +271,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xchg_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -290,7 +290,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -307,7 +307,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -324,7 +324,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -341,7 +341,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -358,7 +358,7 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
@@ -370,10 +370,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_monotonic:
 ; -O1:    ldxrb w0, [x8]
@@ -384,10 +384,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acquire:
 ; -O1:    ldaxrb w0, [x8]
@@ -398,10 +398,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_release:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_release:
 ; -O1:    ldxrb w0, [x8]
@@ -412,10 +412,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w0, [x8]
@@ -426,10 +426,10 @@ define dso_local i8 @atomicrmw_xchg_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xchg_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
-; -O0:    ldaxrb w8, [x11]
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xchg_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w0, [x8]
@@ -580,11 +580,11 @@ define dso_local i128 @atomicrmw_xchg_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -596,11 +596,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -612,11 +612,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -628,11 +628,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -644,11 +644,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -660,11 +660,11 @@ define dso_local i8 @atomicrmw_add_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -676,11 +676,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -692,11 +692,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -708,11 +708,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -724,11 +724,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_add_i16_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_add_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -740,11 +740,11 @@ define dso_local i16 @atomicrmw_add_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -756,11 +756,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -772,11 +772,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -788,11 +788,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -804,11 +804,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_add_i32_aligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_add_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -820,11 +820,11 @@ define dso_local i32 @atomicrmw_add_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_monotonic:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -836,11 +836,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acquire:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -852,11 +852,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_release:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -868,11 +868,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_acq_rel:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -884,11 +884,11 @@ define dso_local i64 @atomicrmw_add_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_add_i64_aligned_seq_cst:
-; -O0:    add x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    add x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_add_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -906,7 +906,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
@@ -925,7 +925,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
@@ -944,7 +944,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
@@ -963,7 +963,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
@@ -982,7 +982,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
@@ -995,11 +995,11 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_monotonic:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1011,11 +1011,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acquire:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1027,11 +1027,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_release:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1043,11 +1043,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1059,11 +1059,11 @@ define dso_local i8 @atomicrmw_add_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_add_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
-; -O0:    add w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    add w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_add_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1320,11 +1320,11 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1336,11 +1336,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1352,11 +1352,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1368,11 +1368,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1384,11 +1384,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -1400,11 +1400,11 @@ define dso_local i8 @atomicrmw_sub_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1416,11 +1416,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1432,11 +1432,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1448,11 +1448,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1464,11 +1464,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_sub_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1480,11 +1480,11 @@ define dso_local i16 @atomicrmw_sub_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1496,11 +1496,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1512,11 +1512,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1528,11 +1528,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1544,11 +1544,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_sub_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1560,11 +1560,11 @@ define dso_local i32 @atomicrmw_sub_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_monotonic:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -1576,11 +1576,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acquire:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -1592,11 +1592,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_release:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -1608,11 +1608,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -1624,11 +1624,11 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
-; -O0:    subs x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    subs x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_sub_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -1646,7 +1646,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
@@ -1665,7 +1665,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
@@ -1684,7 +1684,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
@@ -1703,7 +1703,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
@@ -1722,7 +1722,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
@@ -1735,11 +1735,11 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -1751,11 +1751,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acquire:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -1767,11 +1767,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_release:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -1783,11 +1783,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -1799,11 +1799,11 @@ define dso_local i8 @atomicrmw_sub_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_sub_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
-; -O0:    subs w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    subs w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_sub_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2060,11 +2060,11 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2076,11 +2076,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2092,11 +2092,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2108,11 +2108,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2124,11 +2124,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2140,11 +2140,11 @@ define dso_local i8 @atomicrmw_and_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2156,11 +2156,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2172,11 +2172,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2188,11 +2188,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2204,11 +2204,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_and_i16_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_and_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2220,11 +2220,11 @@ define dso_local i16 @atomicrmw_and_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2236,11 +2236,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2252,11 +2252,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2268,11 +2268,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2284,11 +2284,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_and_i32_aligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_and_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2300,11 +2300,11 @@ define dso_local i32 @atomicrmw_and_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_monotonic:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -2316,11 +2316,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acquire:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -2332,11 +2332,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_release:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -2348,11 +2348,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_acq_rel:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -2364,11 +2364,11 @@ define dso_local i64 @atomicrmw_and_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_and_i64_aligned_seq_cst:
-; -O0:    and x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    and x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_and_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -2387,7 +2387,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
@@ -2408,7 +2408,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
@@ -2429,7 +2429,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
@@ -2450,7 +2450,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
@@ -2471,7 +2471,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
@@ -2485,11 +2485,11 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_monotonic:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2501,11 +2501,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acquire:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2517,11 +2517,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_release:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2533,11 +2533,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2549,11 +2549,11 @@ define dso_local i8 @atomicrmw_and_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_and_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
-; -O0:    and w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    and w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_and_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2821,11 +2821,11 @@ define dso_local i128 @atomicrmw_and_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -2839,11 +2839,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -2857,11 +2857,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -2875,11 +2875,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -2893,11 +2893,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -2911,11 +2911,11 @@ define dso_local i8 @atomicrmw_nand_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2929,11 +2929,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_monotonic(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2947,11 +2947,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acquire(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2965,11 +2965,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_release(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2983,11 +2983,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_nand_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3001,11 +3001,11 @@ define dso_local i16 @atomicrmw_nand_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3019,11 +3019,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_monotonic(ptr %ptr, i32 %value)
 define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3037,11 +3037,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acquire(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3055,11 +3055,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_release(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3073,11 +3073,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_nand_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3091,11 +3091,11 @@ define dso_local i32 @atomicrmw_nand_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3109,11 +3109,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_monotonic(ptr %ptr, i64 %value)
 define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3127,11 +3127,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acquire(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3145,11 +3145,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_release(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -3163,11 +3163,11 @@ define dso_local i64 @atomicrmw_nand_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 define dso_local i64 @atomicrmw_nand_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O0:    and x8, x9, x8
-; -O0:    mvn x12, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    mvn x11, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_nand_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -3189,7 +3189,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
@@ -3214,7 +3214,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
@@ -3239,7 +3239,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
@@ -3264,7 +3264,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
@@ -3289,7 +3289,7 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
@@ -3306,11 +3306,11 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3324,11 +3324,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3342,11 +3342,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3360,11 +3360,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_release(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3378,11 +3378,11 @@ define dso_local i8 @atomicrmw_nand_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 define dso_local i8 @atomicrmw_nand_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O0:    and w8, w9, w8
-; -O0:    mvn w12, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    mvn w11, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_nand_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3700,11 +3700,11 @@ define dso_local i128 @atomicrmw_nand_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -3716,11 +3716,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -3732,11 +3732,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -3748,11 +3748,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -3764,11 +3764,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -3780,11 +3780,11 @@ define dso_local i8 @atomicrmw_or_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -3796,11 +3796,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -3812,11 +3812,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -3828,11 +3828,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -3844,11 +3844,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_or_i16_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_or_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -3860,11 +3860,11 @@ define dso_local i16 @atomicrmw_or_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -3876,11 +3876,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -3892,11 +3892,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -3908,11 +3908,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -3924,11 +3924,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_or_i32_aligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_or_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -3940,11 +3940,11 @@ define dso_local i32 @atomicrmw_or_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_monotonic:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -3956,11 +3956,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acquire:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -3972,11 +3972,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_release:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -3988,11 +3988,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_acq_rel:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4004,11 +4004,11 @@ define dso_local i64 @atomicrmw_or_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_or_i64_aligned_seq_cst:
-; -O0:    orr x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    orr x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_or_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4027,7 +4027,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
@@ -4048,7 +4048,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
@@ -4069,7 +4069,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
@@ -4090,7 +4090,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
@@ -4111,7 +4111,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
@@ -4125,11 +4125,11 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_monotonic:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4141,11 +4141,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acquire:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4157,11 +4157,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_release:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4173,11 +4173,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4189,11 +4189,11 @@ define dso_local i8 @atomicrmw_or_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_or_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
-; -O0:    orr w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    orr w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_or_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4460,11 +4460,11 @@ define dso_local i128 @atomicrmw_or_i128_unaligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4476,11 +4476,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4492,11 +4492,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4508,11 +4508,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4524,11 +4524,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -4540,11 +4540,11 @@ define dso_local i8 @atomicrmw_xor_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -4556,11 +4556,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -4572,11 +4572,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -4588,11 +4588,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -4604,11 +4604,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrh w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_xor_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -4620,11 +4620,11 @@ define dso_local i16 @atomicrmw_xor_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -4636,11 +4636,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -4652,11 +4652,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -4668,11 +4668,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -4684,11 +4684,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxr w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_xor_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -4700,11 +4700,11 @@ define dso_local i32 @atomicrmw_xor_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_monotonic:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -4716,11 +4716,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acquire:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -4732,11 +4732,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_release:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -4748,11 +4748,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -4764,11 +4764,11 @@ define dso_local i64 @atomicrmw_xor_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_xor_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
-; -O0:    eor x12, x9, x8
-; -O0:    ldaxr x8, [x11]
+; -O0:    eor x11, x9, x8
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_xor_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -4787,7 +4787,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
@@ -4808,7 +4808,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
@@ -4850,7 +4850,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
@@ -4871,7 +4871,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
@@ -4885,11 +4885,11 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_monotonic:
 ; -O1:    ldxrb w8, [x0]
@@ -4901,11 +4901,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acquire:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acquire:
 ; -O1:    ldaxrb w8, [x0]
@@ -4917,11 +4917,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_release:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_release:
 ; -O1:    ldxrb w8, [x0]
@@ -4933,11 +4933,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w8, [x0]
@@ -4949,11 +4949,11 @@ define dso_local i8 @atomicrmw_xor_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_xor_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
-; -O0:    eor w12, w9, w8
-; -O0:    ldaxrb w8, [x11]
+; -O0:    eor w11, w9, w8
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_xor_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w8, [x0]
@@ -5220,13 +5220,13 @@ define dso_local i128 @atomicrmw_xor_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5240,13 +5240,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5260,13 +5260,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5280,13 +5280,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5300,13 +5300,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5320,13 +5320,13 @@ define dso_local i8 @atomicrmw_max_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -5340,13 +5340,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -5360,13 +5360,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -5380,13 +5380,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -5400,13 +5400,13 @@ define dso_local i16 @atomicrmw_max_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_max_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -5420,12 +5420,12 @@ define dso_local i16 @atomicrmw_max_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -5438,12 +5438,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -5456,12 +5456,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -5474,12 +5474,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -5492,12 +5492,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_max_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -5510,12 +5510,12 @@ define dso_local i32 @atomicrmw_max_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -5528,12 +5528,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -5546,12 +5546,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -5564,12 +5564,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -5582,12 +5582,12 @@ define dso_local i64 @atomicrmw_max_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, gt
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, gt
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_max_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -5600,7 +5600,7 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5608,7 +5608,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
@@ -5623,7 +5623,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5631,7 +5631,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
@@ -5646,7 +5646,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldxp x10, x12, [x9]
@@ -5654,7 +5654,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
@@ -5669,7 +5669,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5677,7 +5677,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
@@ -5692,7 +5692,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lt
 ; -O0:    csel x14, x11, x8, lt
 ; -O0:    ldaxp x10, x12, [x9]
@@ -5700,7 +5700,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
@@ -5715,13 +5715,13 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -5735,13 +5735,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -5755,13 +5755,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -5775,13 +5775,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -5795,13 +5795,13 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, gt
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, gt
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_max_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -5816,7 +5816,7 @@ define dso_local i8 @atomicrmw_max_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5832,7 +5832,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5848,7 +5848,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5864,7 +5864,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5880,7 +5880,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_max_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5895,7 +5895,7 @@ define dso_local i16 @atomicrmw_max_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5909,7 +5909,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5923,7 +5923,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5937,7 +5937,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5951,7 +5951,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_max_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5965,7 +5965,7 @@ define dso_local i32 @atomicrmw_max_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5979,7 +5979,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -5993,7 +5993,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6007,7 +6007,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6021,7 +6021,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_max_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, gt
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6035,7 +6035,7 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6052,7 +6052,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6069,7 +6069,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6103,7 +6103,7 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lt
 ; -O0:    csel x9, x10, x9, lt
 ; -O0:    bl __atomic_compare_exchange
@@ -6120,13 +6120,13 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6140,13 +6140,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6160,13 +6160,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6180,13 +6180,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6200,13 +6200,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_aligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_aligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6220,13 +6220,13 @@ define dso_local i8 @atomicrmw_min_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_monotonic:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -6240,13 +6240,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acquire:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -6260,13 +6260,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_release:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -6280,13 +6280,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_acq_rel:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -6300,13 +6300,13 @@ define dso_local i16 @atomicrmw_min_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_aligned_seq_cst:
-; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrh w8, [x11]
+; -O0:    sxth w11, w9
+; -O0:    cmp w11, w8, sxth
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_min_i16_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -6320,12 +6320,12 @@ define dso_local i16 @atomicrmw_min_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -6338,12 +6338,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -6356,12 +6356,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -6374,12 +6374,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -6392,12 +6392,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_min_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -6410,12 +6410,12 @@ define dso_local i32 @atomicrmw_min_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -6428,12 +6428,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -6446,12 +6446,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -6464,12 +6464,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -6482,12 +6482,12 @@ define dso_local i64 @atomicrmw_min_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, le
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, le
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_min_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -6500,7 +6500,7 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6508,7 +6508,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
@@ -6523,7 +6523,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6531,7 +6531,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
@@ -6546,7 +6546,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldxp x10, x12, [x9]
@@ -6554,7 +6554,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
@@ -6569,7 +6569,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6577,7 +6577,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
@@ -6592,7 +6592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, ge
 ; -O0:    csel x14, x11, x8, ge
 ; -O0:    ldaxp x10, x12, [x9]
@@ -6600,7 +6600,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
@@ -6615,13 +6615,13 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_monotonic:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_monotonic:
 ; -O1:    ldxrb w9, [x0]
@@ -6635,13 +6635,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acquire:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acquire:
 ; -O1:    ldaxrb w9, [x0]
@@ -6655,13 +6655,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_release:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_release:
 ; -O1:    ldxrb w9, [x0]
@@ -6675,13 +6675,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_acq_rel:
 ; -O1:    ldaxrb w9, [x0]
@@ -6695,13 +6695,13 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
-; -O0:    sxtb w10, w9
-; -O0:    subs w10, w10, w8, sxtb
-; -O0:    csel w12, w9, w8, le
-; -O0:    ldaxrb w8, [x11]
+; -O0:    sxtb w11, w9
+; -O0:    cmp w11, w8, sxtb
+; -O0:    csel w11, w9, w8, le
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w9, uxtb
+; -O0:    stlxrb wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxtb
-; -O0:    stlxrb w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxtb
 ;
 ; -O1-LABEL: atomicrmw_min_i8_unaligned_seq_cst:
 ; -O1:    ldaxrb w9, [x0]
@@ -6716,7 +6716,7 @@ define dso_local i8 @atomicrmw_min_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_monotonic:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6732,7 +6732,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_monotonic(ptr %ptr, i16 %value
 define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acquire:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6748,7 +6748,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_release:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6764,7 +6764,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_acq_rel:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6780,7 +6780,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_min_i16_unaligned_seq_cst:
 ; -O0:    sxth w10, w9
-; -O0:    subs w10, w10, w8, sxth
+; -O0:    cmp w10, w8, sxth
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6795,7 +6795,7 @@ define dso_local i16 @atomicrmw_min_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6809,7 +6809,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_monotonic(ptr %ptr, i32 %value
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6823,7 +6823,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6837,7 +6837,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6851,7 +6851,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_min_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6865,7 +6865,7 @@ define dso_local i32 @atomicrmw_min_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6879,7 +6879,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_monotonic(ptr %ptr, i64 %value
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6893,7 +6893,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6907,7 +6907,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6921,7 +6921,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_min_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, le
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -6935,7 +6935,7 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6952,7 +6952,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6969,7 +6969,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -6986,7 +6986,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7003,7 +7003,7 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, ge
 ; -O0:    csel x9, x10, x9, ge
 ; -O0:    bl __atomic_compare_exchange
@@ -7020,13 +7020,13 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %valu
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7040,13 +7040,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7060,13 +7060,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7080,13 +7080,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7100,13 +7100,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7120,13 +7120,13 @@ define dso_local i8 @atomicrmw_umax_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -7140,13 +7140,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -7160,13 +7160,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -7180,13 +7180,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -7200,13 +7200,13 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -7220,12 +7220,12 @@ define dso_local i16 @atomicrmw_umax_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -7238,12 +7238,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -7256,12 +7256,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -7274,12 +7274,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -7292,12 +7292,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, hi
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, hi
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -7310,12 +7310,12 @@ define dso_local i32 @atomicrmw_umax_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -7328,12 +7328,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -7346,12 +7346,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -7364,12 +7364,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -7382,12 +7382,12 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, hi
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, hi
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umax_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -7400,7 +7400,7 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7408,7 +7408,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
@@ -7423,7 +7423,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7431,7 +7431,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
@@ -7446,7 +7446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldxp x10, x12, [x9]
@@ -7454,7 +7454,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
@@ -7469,7 +7469,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7477,7 +7477,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
@@ -7492,7 +7492,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, lo
 ; -O0:    csel x14, x11, x8, lo
 ; -O0:    ldaxp x10, x12, [x9]
@@ -7500,7 +7500,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
@@ -7515,13 +7515,13 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7535,13 +7535,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7555,13 +7555,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7575,13 +7575,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -7595,13 +7595,13 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, hi
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, hi
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umax_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -7616,7 +7616,7 @@ define dso_local i8 @atomicrmw_umax_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7632,7 +7632,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7648,7 +7648,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7664,7 +7664,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7680,7 +7680,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umax_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7695,7 +7695,7 @@ define dso_local i16 @atomicrmw_umax_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7709,7 +7709,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7723,7 +7723,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7737,7 +7737,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7751,7 +7751,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umax_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7765,7 +7765,7 @@ define dso_local i32 @atomicrmw_umax_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7779,7 +7779,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7793,7 +7793,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7807,7 +7807,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7821,7 +7821,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umax_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, hi
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -7835,7 +7835,7 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7852,7 +7852,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7869,7 +7869,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7886,7 +7886,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7903,7 +7903,7 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, lo
 ; -O0:    csel x9, x10, x9, lo
 ; -O0:    bl __atomic_compare_exchange
@@ -7920,13 +7920,13 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %val
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -7940,13 +7940,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_monotonic(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -7960,13 +7960,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -7980,13 +7980,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8000,13 +8000,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8020,13 +8020,13 @@ define dso_local i8 @atomicrmw_umin_i8_aligned_seq_cst(ptr %ptr, i8 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_monotonic:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_monotonic:
 ; -O1:    and w9, w1, #0xffff
@@ -8040,13 +8040,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_monotonic(ptr %ptr, i16 %value)
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acquire:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acquire:
 ; -O1:    and w9, w1, #0xffff
@@ -8060,13 +8060,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acquire(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_release:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_release:
 ; -O1:    and w9, w1, #0xffff
@@ -8080,13 +8080,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_release(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_acq_rel:
 ; -O1:    and w9, w1, #0xffff
@@ -8100,13 +8100,13 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_acq_rel(ptr %ptr, i16 %value) {
 
 define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
-; -O0:    and w9, w12, #0xffff
-; -O0:    subs w10, w9, w8, uxth
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrh w8, [x11]
-; -O0:    cmp w8, w12, uxth
-; -O0:    stlxrh w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xffff
+; -O0:    cmp w9, w8, uxth
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w11, uxth
+; -O0:    stlxrh wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i16_aligned_seq_cst:
 ; -O1:    and w9, w1, #0xffff
@@ -8120,12 +8120,12 @@ define dso_local i16 @atomicrmw_umin_i16_aligned_seq_cst(ptr %ptr, i16 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_monotonic:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -8138,12 +8138,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_monotonic(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acquire:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -8156,12 +8156,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acquire(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_release:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -8174,12 +8174,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_release(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -8192,12 +8192,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_acq_rel(ptr %ptr, i32 %value) {
 
 define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
-; -O0:    subs w10, w9, w8
-; -O0:    csel w12, w9, w8, ls
-; -O0:    ldaxr w8, [x11]
+; -O0:    cmp w9, w8
+; -O0:    csel w11, w9, w8, ls
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i32_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -8210,12 +8210,12 @@ define dso_local i32 @atomicrmw_umin_i32_aligned_seq_cst(ptr %ptr, i32 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_monotonic:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_monotonic:
 ; -O1:    ldxr x0, [x8]
@@ -8228,12 +8228,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_monotonic(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acquire:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acquire:
 ; -O1:    ldaxr x0, [x8]
@@ -8246,12 +8246,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acquire(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_release:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_release:
 ; -O1:    ldxr x0, [x8]
@@ -8264,12 +8264,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_release(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_acq_rel:
 ; -O1:    ldaxr x0, [x8]
@@ -8282,12 +8282,12 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_acq_rel(ptr %ptr, i64 %value) {
 
 define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
-; -O0:    subs x10, x9, x8
-; -O0:    csel x12, x9, x8, ls
-; -O0:    ldaxr x8, [x11]
+; -O0:    cmp x9, x8
+; -O0:    csel x11, x9, x8, ls
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_umin_i64_aligned_seq_cst:
 ; -O1:    ldaxr x0, [x8]
@@ -8300,7 +8300,7 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8308,7 +8308,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
@@ -8323,7 +8323,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8331,7 +8331,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stxp w8, x14, x15, [x9]
 ; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
@@ -8346,7 +8346,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldxp x10, x12, [x9]
@@ -8354,7 +8354,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
@@ -8369,7 +8369,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8377,7 +8377,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
@@ -8392,7 +8392,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x12, x8, x11
+; -O0:    cmp x8, x11
 ; -O0:    csel x15, x13, x10, hs
 ; -O0:    csel x14, x11, x8, hs
 ; -O0:    ldaxp x10, x12, [x9]
@@ -8400,7 +8400,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O0:    cmp x12, x13
 ; -O0:    stlxp w8, x14, x15, [x9]
 ; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
+; -O0:    cmp x12, x13
 ; -O0:    ccmp x10, x11, #0, eq
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
@@ -8415,13 +8415,13 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_monotonic:
 ; -O1:    and w9, w1, #0xff
@@ -8435,13 +8435,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_monotonic(ptr %ptr, i8 %value)
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acquire:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acquire:
 ; -O1:    and w9, w1, #0xff
@@ -8455,13 +8455,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acquire(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_release:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_release:
 ; -O1:    and w9, w1, #0xff
@@ -8475,13 +8475,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_release(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_acq_rel:
 ; -O1:    and w9, w1, #0xff
@@ -8495,13 +8495,13 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_acq_rel(ptr %ptr, i8 %value) {
 
 define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 ; -O0-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
-; -O0:    and w9, w12, #0xff
-; -O0:    subs w10, w9, w8, uxtb
-; -O0:    csel w13, w12, w8, ls
-; -O0:    ldaxrb w8, [x11]
-; -O0:    cmp w8, w12, uxtb
-; -O0:    stlxrb w10, w13, [x11]
-; -O0:    subs w9, w8, w9
+; -O0:    and w9, w11, #0xff
+; -O0:    cmp w9, w8, uxtb
+; -O0:    csel w12, w11, w8, ls
+; -O0:    ldaxrb w8, [x10]
+; -O0:    cmp w8, w11, uxtb
+; -O0:    stlxrb wzr, w12, [x10]
+; -O0:    cmp w8, w9
 ;
 ; -O1-LABEL: atomicrmw_umin_i8_unaligned_seq_cst:
 ; -O1:    and w9, w1, #0xff
@@ -8516,7 +8516,7 @@ define dso_local i8 @atomicrmw_umin_i8_unaligned_seq_cst(ptr %ptr, i8 %value) {
 define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_monotonic:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8532,7 +8532,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_monotonic(ptr %ptr, i16 %valu
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acquire:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8548,7 +8548,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acquire(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_release:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8564,7 +8564,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_release(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_acq_rel:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8580,7 +8580,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_acq_rel(ptr %ptr, i16 %value)
 define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value) {
 ; -O0-LABEL: atomicrmw_umin_i16_unaligned_seq_cst:
 ; -O0:    and w10, w9, #0xffff
-; -O0:    subs w10, w10, w8, uxth
+; -O0:    cmp w10, w8, uxth
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8595,7 +8595,7 @@ define dso_local i16 @atomicrmw_umin_i16_unaligned_seq_cst(ptr %ptr, i16 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_monotonic:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8609,7 +8609,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_monotonic(ptr %ptr, i32 %valu
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acquire:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8623,7 +8623,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acquire(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_release:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8637,7 +8637,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_release(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_acq_rel:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8651,7 +8651,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_acq_rel(ptr %ptr, i32 %value)
 
 define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value) {
 ; -O0-LABEL: atomicrmw_umin_i32_unaligned_seq_cst:
-; -O0:    subs w10, w9, w8
+; -O0:    cmp w9, w8
 ; -O0:    csel w8, w9, w8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8665,7 +8665,7 @@ define dso_local i32 @atomicrmw_umin_i32_unaligned_seq_cst(ptr %ptr, i32 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_monotonic:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8679,7 +8679,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_monotonic(ptr %ptr, i64 %valu
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acquire:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8693,7 +8693,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acquire(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_release:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8707,7 +8707,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_release(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_acq_rel:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8721,7 +8721,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_acq_rel(ptr %ptr, i64 %value)
 
 define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value) {
 ; -O0-LABEL: atomicrmw_umin_i64_unaligned_seq_cst:
-; -O0:    subs x10, x9, x8
+; -O0:    cmp x9, x8
 ; -O0:    csel x8, x9, x8, ls
 ; -O0:    bl __atomic_compare_exchange
 ;
@@ -8735,7 +8735,7 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8752,7 +8752,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8769,7 +8769,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8786,7 +8786,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acq_rel:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
@@ -8803,7 +8803,7 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_seq_cst:
-; -O0:    subs x12, x9, x10
+; -O0:    cmp x9, x10
 ; -O0:    csel x8, x11, x8, hs
 ; -O0:    csel x9, x10, x9, hs
 ; -O0:    bl __atomic_compare_exchange
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a_fp.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a_fp.ll
index fc3ed5680b23a..21cdb685a6f41 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a_fp.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a_fp.ll
@@ -5,10 +5,10 @@
 
 define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -19,10 +19,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -33,10 +33,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -47,10 +47,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -61,10 +61,10 @@ define dso_local half @atomicrmw_fadd_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fadd_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -77,10 +77,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -95,10 +95,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -113,10 +113,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -131,10 +131,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -149,10 +149,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fadd_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -165,10 +165,10 @@ define dso_local bfloat @atomicrmw_fadd_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -179,10 +179,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -193,10 +193,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -207,10 +207,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -221,10 +221,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fadd_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -235,10 +235,10 @@ define dso_local float @atomicrmw_fadd_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -249,10 +249,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -263,10 +263,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -277,10 +277,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -291,10 +291,10 @@ define dso_local double @atomicrmw_fadd_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fadd_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fadd_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -480,10 +480,10 @@ define dso_local double @atomicrmw_fadd_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -494,10 +494,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -508,10 +508,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -522,10 +522,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -536,10 +536,10 @@ define dso_local half @atomicrmw_fsub_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fsub_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -552,10 +552,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -570,10 +570,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -588,10 +588,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -606,10 +606,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -624,10 +624,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fsub_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -640,10 +640,10 @@ define dso_local bfloat @atomicrmw_fsub_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -654,10 +654,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -668,10 +668,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -682,10 +682,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -696,10 +696,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fsub_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -710,10 +710,10 @@ define dso_local float @atomicrmw_fsub_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -724,10 +724,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -738,10 +738,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -752,10 +752,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -766,10 +766,10 @@ define dso_local double @atomicrmw_fsub_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fsub_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fsub_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -955,10 +955,10 @@ define dso_local double @atomicrmw_fsub_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -969,10 +969,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -983,10 +983,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -997,10 +997,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1011,10 +1011,10 @@ define dso_local half @atomicrmw_fmax_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmax_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1027,10 +1027,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1045,10 +1045,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -1063,10 +1063,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -1081,10 +1081,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -1099,10 +1099,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmax_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -1115,10 +1115,10 @@ define dso_local bfloat @atomicrmw_fmax_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1129,10 +1129,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1143,10 +1143,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1157,10 +1157,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1171,10 +1171,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmax_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1185,10 +1185,10 @@ define dso_local float @atomicrmw_fmax_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -1199,10 +1199,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -1213,10 +1213,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -1227,10 +1227,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -1241,10 +1241,10 @@ define dso_local double @atomicrmw_fmax_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmax_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmax_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -1430,10 +1430,10 @@ define dso_local double @atomicrmw_fmax_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1444,10 +1444,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_monotonic(ptr %ptr, half %val
 
 define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1458,10 +1458,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_acquire(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1472,10 +1472,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_release(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1486,10 +1486,10 @@ define dso_local half @atomicrmw_fmin_half_aligned_acq_rel(ptr %ptr, half %value
 
 define dso_local half @atomicrmw_fmin_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1502,10 +1502,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_monotonic(ptr %ptr, bfloa
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1520,10 +1520,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acquire(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -1538,10 +1538,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_release(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -1556,10 +1556,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_acq_rel(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -1574,10 +1574,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 ; -O0-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmin_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -1590,10 +1590,10 @@ define dso_local bfloat @atomicrmw_fmin_bfloat_aligned_seq_cst(ptr %ptr, bfloat
 
 define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -1604,10 +1604,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_monotonic(ptr %ptr, float %
 
 define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -1618,10 +1618,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_acquire(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -1632,10 +1632,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_release(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -1646,10 +1646,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_acq_rel(ptr %ptr, float %va
 
 define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmin_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -1660,10 +1660,10 @@ define dso_local float @atomicrmw_fmin_float_aligned_seq_cst(ptr %ptr, float %va
 
 define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -1674,10 +1674,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_monotonic(ptr %ptr, doubl
 
 define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -1688,10 +1688,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_acquire(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -1702,10 +1702,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_release(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -1716,10 +1716,10 @@ define dso_local double @atomicrmw_fmin_double_aligned_acq_rel(ptr %ptr, double
 
 define dso_local double @atomicrmw_fmin_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmin_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -1905,10 +1905,10 @@ define dso_local double @atomicrmw_fmin_double_unaligned_seq_cst(ptr %ptr, doubl
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -1919,10 +1919,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_monotonic(ptr %ptr, half
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -1933,10 +1933,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_acquire(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -1947,10 +1947,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_release(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -1961,10 +1961,10 @@ define dso_local half @atomicrmw_fmaximum_half_aligned_acq_rel(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fmaximum_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -1977,10 +1977,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_monotonic(ptr %ptr, b
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -1995,10 +1995,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_acquire(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -2013,10 +2013,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_release(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -2031,10 +2031,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_acq_rel(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -2049,10 +2049,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fmaximum_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -2065,10 +2065,10 @@ define dso_local bfloat @atomicrmw_fmaximum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2079,10 +2079,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_monotonic(ptr %ptr, flo
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2093,10 +2093,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_acquire(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2107,10 +2107,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_release(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2121,10 +2121,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_acq_rel(ptr %ptr, float
 
 define dso_local float @atomicrmw_fmaximum_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2135,10 +2135,10 @@ define dso_local float @atomicrmw_fmaximum_float_aligned_seq_cst(ptr %ptr, float
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -2149,10 +2149,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_monotonic(ptr %ptr, d
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -2163,10 +2163,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_acquire(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -2177,10 +2177,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_release(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -2191,10 +2191,10 @@ define dso_local double @atomicrmw_fmaximum_double_aligned_acq_rel(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fmaximum_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fmaximum_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fmaximum_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
@@ -2380,10 +2380,10 @@ define dso_local double @atomicrmw_fmaximum_double_unaligned_seq_cst(ptr %ptr, d
 
 define dso_local half @atomicrmw_fminimum_half_aligned_monotonic(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_monotonic:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_monotonic:
 ; -O1:    ldxrh w8, [x0]
@@ -2394,10 +2394,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_monotonic(ptr %ptr, half
 
 define dso_local half @atomicrmw_fminimum_half_aligned_acquire(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_acquire:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_acquire:
 ; -O1:    ldaxrh w8, [x0]
@@ -2408,10 +2408,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_acquire(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_release(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_release:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_release:
 ; -O1:    ldxrh w8, [x0]
@@ -2422,10 +2422,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_release(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_acq_rel(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_acq_rel:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_acq_rel:
 ; -O1:    ldaxrh w8, [x0]
@@ -2436,10 +2436,10 @@ define dso_local half @atomicrmw_fminimum_half_aligned_acq_rel(ptr %ptr, half %v
 
 define dso_local half @atomicrmw_fminimum_half_aligned_seq_cst(ptr %ptr, half %value) {
 ; -O0-LABEL: atomicrmw_fminimum_half_aligned_seq_cst:
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_half_aligned_seq_cst:
 ; -O1:    ldaxrh w8, [x0]
@@ -2452,10 +2452,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_monotonic(ptr %ptr, b
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_monotonic:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_monotonic:
 ; -O1:    ldxrh w9, [x0]
@@ -2470,10 +2470,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_acquire(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_acquire:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_acquire:
 ; -O1:    ldaxrh w9, [x0]
@@ -2488,10 +2488,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_release(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_release:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_release:
 ; -O1:    ldxrh w9, [x0]
@@ -2506,10 +2506,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_acq_rel(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_acq_rel:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_acq_rel:
 ; -O1:    ldaxrh w9, [x0]
@@ -2524,10 +2524,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 ; -O0-LABEL: atomicrmw_fminimum_bfloat_aligned_seq_cst:
 ; -O0:    add w8, w8, w9
 ; -O0:    add w8, w8, w9
-; -O0:    ldaxrh w8, [x11]
+; -O0:    ldaxrh w8, [x10]
+; -O0:    cmp w8, w9, uxth
+; -O0:    stlxrh wzr, w11, [x10]
 ; -O0:    cmp w8, w9, uxth
-; -O0:    stlxrh w10, w12, [x11]
-; -O0:    subs w9, w8, w9, uxth
 ;
 ; -O1-LABEL: atomicrmw_fminimum_bfloat_aligned_seq_cst:
 ; -O1:    ldaxrh w9, [x0]
@@ -2540,10 +2540,10 @@ define dso_local bfloat @atomicrmw_fminimum_bfloat_aligned_seq_cst(ptr %ptr, bfl
 
 define dso_local float @atomicrmw_fminimum_float_aligned_monotonic(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_monotonic:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_monotonic:
 ; -O1:    ldxr w8, [x0]
@@ -2554,10 +2554,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_monotonic(ptr %ptr, flo
 
 define dso_local float @atomicrmw_fminimum_float_aligned_acquire(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_acquire:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_acquire:
 ; -O1:    ldaxr w8, [x0]
@@ -2568,10 +2568,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_acquire(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_release(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_release:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_release:
 ; -O1:    ldxr w8, [x0]
@@ -2582,10 +2582,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_release(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_acq_rel(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_acq_rel:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_acq_rel:
 ; -O1:    ldaxr w8, [x0]
@@ -2596,10 +2596,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_acq_rel(ptr %ptr, float
 
 define dso_local float @atomicrmw_fminimum_float_aligned_seq_cst(ptr %ptr, float %value) {
 ; -O0-LABEL: atomicrmw_fminimum_float_aligned_seq_cst:
-; -O0:    ldaxr w8, [x11]
+; -O0:    ldaxr w8, [x10]
+; -O0:    cmp w8, w9
+; -O0:    stlxr wzr, w11, [x10]
 ; -O0:    cmp w8, w9
-; -O0:    stlxr w10, w12, [x11]
-; -O0:    subs w9, w8, w9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_float_aligned_seq_cst:
 ; -O1:    ldaxr w8, [x0]
@@ -2610,10 +2610,10 @@ define dso_local float @atomicrmw_fminimum_float_aligned_seq_cst(ptr %ptr, float
 
 define dso_local double @atomicrmw_fminimum_double_aligned_monotonic(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_monotonic:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_monotonic:
 ; -O1:    ldxr x8, [x0]
@@ -2624,10 +2624,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_monotonic(ptr %ptr, d
 
 define dso_local double @atomicrmw_fminimum_double_aligned_acquire(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_acquire:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_acquire:
 ; -O1:    ldaxr x8, [x0]
@@ -2638,10 +2638,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_acquire(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_release(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_release:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_release:
 ; -O1:    ldxr x8, [x0]
@@ -2652,10 +2652,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_release(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_acq_rel(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_acq_rel:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_acq_rel:
 ; -O1:    ldaxr x8, [x0]
@@ -2666,10 +2666,10 @@ define dso_local double @atomicrmw_fminimum_double_aligned_acq_rel(ptr %ptr, dou
 
 define dso_local double @atomicrmw_fminimum_double_aligned_seq_cst(ptr %ptr, double %value) {
 ; -O0-LABEL: atomicrmw_fminimum_double_aligned_seq_cst:
-; -O0:    ldaxr x8, [x11]
+; -O0:    ldaxr x8, [x10]
+; -O0:    cmp x8, x9
+; -O0:    stlxr wzr, x11, [x10]
 ; -O0:    cmp x8, x9
-; -O0:    stlxr w10, x12, [x11]
-; -O0:    subs x9, x8, x9
 ;
 ; -O1-LABEL: atomicrmw_fminimum_double_aligned_seq_cst:
 ; -O1:    ldaxr x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-lse2.ll
index fa4672ec5acca..5d6e20c29a3db 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-lse2.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc.ll
index 8de79f3d8fb12..175f807a5ed20 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
index 3c3b18c69f814..5682832440117 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-v8a.ll
index 33441815541a4..23de19f103cc9 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-v8a.ll
@@ -6,8 +6,8 @@
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -22,8 +22,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -37,8 +37,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -53,8 +53,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -68,8 +68,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -84,8 +84,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -99,8 +99,8 @@ define dso_local i8 @cmpxchg_i8_aligned_monotonic_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -115,8 +115,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -130,8 +130,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -146,8 +146,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -161,8 +161,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -177,8 +177,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -192,8 +192,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acquire_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -208,8 +208,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -223,8 +223,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -239,8 +239,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -254,8 +254,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -270,8 +270,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -285,8 +285,8 @@ define dso_local i8 @cmpxchg_i8_aligned_release_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -301,8 +301,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -316,8 +316,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -332,8 +332,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -347,8 +347,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -363,8 +363,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -378,8 +378,8 @@ define dso_local i8 @cmpxchg_i8_aligned_acq_rel_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -394,8 +394,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -409,8 +409,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_monotonic_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -425,8 +425,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -440,8 +440,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_acquire_weak(i8 %expected, i8 %n
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -456,8 +456,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst(i8 %expected, i8 %new, p
 define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -471,8 +471,8 @@ define dso_local i8 @cmpxchg_i8_aligned_seq_cst_seq_cst_weak(i8 %expected, i8 %n
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -487,8 +487,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic(i16 %expected, i16
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -502,8 +502,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_monotonic_weak(i16 %expected
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -518,8 +518,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -533,8 +533,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_acquire_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -549,8 +549,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -564,8 +564,8 @@ define dso_local i16 @cmpxchg_i16_aligned_monotonic_seq_cst_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -580,8 +580,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -595,8 +595,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -611,8 +611,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -626,8 +626,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -642,8 +642,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -657,8 +657,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acquire_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -673,8 +673,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_monotonic_weak:
 ; -O1:    ldxrh w8, [x2]
@@ -688,8 +688,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -704,8 +704,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -719,8 +719,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -735,8 +735,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_release_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -750,8 +750,8 @@ define dso_local i16 @cmpxchg_i16_aligned_release_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -766,8 +766,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -781,8 +781,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -797,8 +797,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -812,8 +812,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -828,8 +828,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -843,8 +843,8 @@ define dso_local i16 @cmpxchg_i16_aligned_acq_rel_seq_cst_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xffff
@@ -859,8 +859,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic(i16 %expected, i16 %
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -874,8 +874,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_monotonic_weak(i16 %expected,
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xffff
@@ -890,8 +890,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -905,8 +905,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_acquire_weak(i16 %expected, i1
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xffff
@@ -921,8 +921,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst(i16 %expected, i16 %ne
 define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i16 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrh w0, [x2]
-; -O0:    cmp w0, w9, uxth
-; -O0:    stlxrh w8, w1, [x2]
+; -O0:    cmp w0, w8, uxth
+; -O0:    stlxrh wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i16_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrh w8, [x2]
@@ -936,8 +936,8 @@ define dso_local i16 @cmpxchg_i16_aligned_seq_cst_seq_cst_weak(i16 %expected, i1
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -951,8 +951,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic(i32 %expected, i32
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -966,8 +966,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_monotonic_weak(i32 %expected
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -981,8 +981,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -996,8 +996,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_acquire_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1011,8 +1011,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1026,8 +1026,8 @@ define dso_local i32 @cmpxchg_i32_aligned_monotonic_seq_cst_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1041,8 +1041,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1056,8 +1056,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1071,8 +1071,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1086,8 +1086,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1101,8 +1101,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1116,8 +1116,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acquire_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic:
 ; -O1:    ldxr w0, [x2]
@@ -1131,8 +1131,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_monotonic_weak:
 ; -O1:    ldxr w8, [x2]
@@ -1146,8 +1146,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1161,8 +1161,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1176,8 +1176,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1191,8 +1191,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1206,8 +1206,8 @@ define dso_local i32 @cmpxchg_i32_aligned_release_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1221,8 +1221,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1236,8 +1236,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1251,8 +1251,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1266,8 +1266,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1281,8 +1281,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1296,8 +1296,8 @@ define dso_local i32 @cmpxchg_i32_aligned_acq_rel_seq_cst_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr w0, [x2]
@@ -1311,8 +1311,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic(i32 %expected, i32 %
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1326,8 +1326,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_monotonic_weak(i32 %expected,
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire:
 ; -O1:    ldaxr w0, [x2]
@@ -1341,8 +1341,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1356,8 +1356,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_acquire_weak(i32 %expected, i3
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr w0, [x2]
@@ -1371,8 +1371,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst(i32 %expected, i32 %ne
 define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i32 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr w0, [x2]
-; -O0:    cmp w0, w9
-; -O0:    stlxr w8, w1, [x2]
+; -O0:    cmp w0, w8
+; -O0:    stlxr wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i32_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr w8, [x2]
@@ -1386,8 +1386,8 @@ define dso_local i32 @cmpxchg_i32_aligned_seq_cst_seq_cst_weak(i32 %expected, i3
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1401,8 +1401,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic(i64 %expected, i64
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1416,8 +1416,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_monotonic_weak(i64 %expected
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1431,8 +1431,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1446,8 +1446,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_acquire_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1461,8 +1461,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1476,8 +1476,8 @@ define dso_local i64 @cmpxchg_i64_aligned_monotonic_seq_cst_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1491,8 +1491,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1506,8 +1506,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1521,8 +1521,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1536,8 +1536,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1551,8 +1551,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acquire_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1566,8 +1566,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acquire_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic:
 ; -O1:    ldxr x0, [x2]
@@ -1581,8 +1581,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_monotonic_weak:
 ; -O1:    ldxr x8, [x2]
@@ -1596,8 +1596,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1611,8 +1611,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1626,8 +1626,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1641,8 +1641,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_release_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1656,8 +1656,8 @@ define dso_local i64 @cmpxchg_i64_aligned_release_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1671,8 +1671,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1686,8 +1686,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1701,8 +1701,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1716,8 +1716,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1731,8 +1731,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1746,8 +1746,8 @@ define dso_local i64 @cmpxchg_i64_aligned_acq_rel_seq_cst_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic:
 ; -O1:    ldaxr x0, [x2]
@@ -1761,8 +1761,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic(i64 %expected, i64 %
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1776,8 +1776,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_monotonic_weak(i64 %expected,
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire:
 ; -O1:    ldaxr x0, [x2]
@@ -1791,8 +1791,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_acquire_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -1806,8 +1806,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_acquire_weak(i64 %expected, i6
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst:
 ; -O1:    ldaxr x0, [x2]
@@ -1821,8 +1821,8 @@ define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst(i64 %expected, i64 %ne
 define dso_local i64 @cmpxchg_i64_aligned_seq_cst_seq_cst_weak(i64 %expected, i64 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxr x0, [x2]
-; -O0:    cmp x0, x9
-; -O0:    stlxr w8, x1, [x2]
+; -O0:    cmp x0, x8
+; -O0:    stlxr wzr, x1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i64_aligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxr x8, [x2]
@@ -2406,8 +2406,8 @@ define dso_local i128 @cmpxchg_i128_aligned_seq_cst_seq_cst_weak(i128 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2422,8 +2422,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic(i8 %expected, i8 %
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2437,8 +2437,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_monotonic_weak(i8 %expected,
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2453,8 +2453,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2468,8 +2468,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_acquire_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2484,8 +2484,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_monotonic_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2499,8 +2499,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_monotonic_seq_cst_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2515,8 +2515,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2530,8 +2530,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2546,8 +2546,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2561,8 +2561,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2577,8 +2577,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acquire_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2592,8 +2592,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acquire_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2608,8 +2608,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_monotonic_weak:
 ; -O1:    ldxrb w8, [x2]
@@ -2623,8 +2623,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2639,8 +2639,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2654,8 +2654,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2670,8 +2670,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_release_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2685,8 +2685,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_release_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2701,8 +2701,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2716,8 +2716,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2732,8 +2732,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2747,8 +2747,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2763,8 +2763,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_acq_rel_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2778,8 +2778,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_acq_rel_seq_cst_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic:
 ; -O1:    and w8, w0, #0xff
@@ -2794,8 +2794,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic(i8 %expected, i8 %ne
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_monotonic_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2809,8 +2809,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_monotonic_weak(i8 %expected, i
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire:
 ; -O1:    and w8, w0, #0xff
@@ -2825,8 +2825,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_acquire_weak:
 ; -O1:    ldaxrb w8, [x2]
@@ -2840,8 +2840,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_acquire_weak(i8 %expected, i8
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst:
 ; -O1:    and w8, w0, #0xff
@@ -2856,8 +2856,8 @@ define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst(i8 %expected, i8 %new,
 define dso_local i8 @cmpxchg_i8_unaligned_seq_cst_seq_cst_weak(i8 %expected, i8 %new, ptr %ptr) {
 ; -O0-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O0:    ldaxrb w0, [x2]
-; -O0:    cmp w0, w9, uxtb
-; -O0:    stlxrb w8, w1, [x2]
+; -O0:    cmp w0, w8, uxtb
+; -O0:    stlxrb wzr, w1, [x2]
 ;
 ; -O1-LABEL: cmpxchg_i8_unaligned_seq_cst_seq_cst_weak:
 ; -O1:    ldaxrb w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 5bc041aef88ba..b7096ad6f0410 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -41,14 +41,14 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 {
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB0_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB0_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB0_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB0_1
 ; CHECK-NOLSE-O0-NEXT:  LBB0_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -117,15 +117,15 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 {
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
-; CHECK-NOLSE-O0-NEXT:    ldr w10, [x2]
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [x2]
 ; CHECK-NOLSE-O0-NEXT:  LBB1_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB1_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, w10, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB1_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w9, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB1_1
 ; CHECK-NOLSE-O0-NEXT:  LBB1_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -196,14 +196,14 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 {
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB2_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB2_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB2_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB2_1
 ; CHECK-NOLSE-O0-NEXT:  LBB2_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -270,14 +270,14 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 {
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB3_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB3_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB3_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB3_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB3_1
 ; CHECK-NOLSE-O0-NEXT:  LBB3_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -344,14 +344,14 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB4_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB4_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB4_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB4_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB4_1
 ; CHECK-NOLSE-O0-NEXT:  LBB4_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -418,14 +418,14 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
 ;
 ; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_release_acquire:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB5_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB5_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB5_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB5_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB5_1
 ; CHECK-NOLSE-O0-NEXT:  LBB5_3:
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -497,22 +497,22 @@ define i32 @fetch_and_nand(ptr %p) #0 {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB6_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    and w9, w8, #0x7
-; CHECK-NOLSE-O0-NEXT:    mvn w12, w9
+; CHECK-NOLSE-O0-NEXT:    mvn w11, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB6_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB6_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB6_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB6_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB6_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB6_2
 ; CHECK-NOLSE-O0-NEXT:  LBB6_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB6_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -540,7 +540,7 @@ define i32 @fetch_and_nand(ptr %p) #0 {
 ; CHECK-OUTLINE-O0-NEXT:    mvn w1, w8
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
@@ -582,7 +582,7 @@ define i32 @fetch_and_nand(ptr %p) #0 {
 ; CHECK-LSE-O0-NEXT:    mvn w10, w9
 ; CHECK-LSE-O0-NEXT:    mov x9, x8
 ; CHECK-LSE-O0-NEXT:    casl w9, w10, [x11]
-; CHECK-LSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-LSE-O0-NEXT:    cmp w9, w8
 ; CHECK-LSE-O0-NEXT:    cset w8, eq
 ; CHECK-LSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-LSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -634,22 +634,22 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB7_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    and x9, x8, #0x7
-; CHECK-NOLSE-O0-NEXT:    mvn x12, x9
+; CHECK-NOLSE-O0-NEXT:    mvn x11, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB7_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB7_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB7_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB7_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB7_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB7_2
 ; CHECK-NOLSE-O0-NEXT:  LBB7_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB7_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -677,7 +677,7 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
 ; CHECK-OUTLINE-O0-NEXT:    mvn x1, x8
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
@@ -719,7 +719,7 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
 ; CHECK-LSE-O0-NEXT:    mvn x10, x9
 ; CHECK-LSE-O0-NEXT:    mov x9, x8
 ; CHECK-LSE-O0-NEXT:    casal x9, x10, [x11]
-; CHECK-LSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-LSE-O0-NEXT:    cmp x9, x8
 ; CHECK-LSE-O0-NEXT:    cset w8, eq
 ; CHECK-LSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-LSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -767,22 +767,22 @@ define i32 @fetch_and_or(ptr %p) #0 {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB8_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    mov w9, #5 ; =0x5
-; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    orr w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB8_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB8_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB8_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB8_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB8_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB8_2
 ; CHECK-NOLSE-O0-NEXT:  LBB8_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB8_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -841,21 +841,21 @@ define i64 @fetch_and_or_64(ptr %p) #0 {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB9_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    orr x12, x8, #0x7
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    orr x11, x8, #0x7
 ; CHECK-NOLSE-O0-NEXT:  LBB9_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB9_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB9_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB9_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB9_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB9_2
 ; CHECK-NOLSE-O0-NEXT:  LBB9_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB9_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -1880,24 +1880,24 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB27_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    add w12, w8, w10, uxth
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    add w11, w9, w10, uxth
 ; CHECK-NOLSE-O0-NEXT:  LBB27_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB27_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB27_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB27_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB27_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB27_2
 ; CHECK-NOLSE-O0-NEXT:  LBB27_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB27_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB27_1
@@ -1973,23 +1973,23 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB28_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w11, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:  LBB28_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB28_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB28_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB28_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB28_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB28_2
 ; CHECK-NOLSE-O0-NEXT:  LBB28_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB28_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB28_1
@@ -2066,24 +2066,24 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB29_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w12, w10, w8
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    subs w11, w10, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB29_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB29_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB29_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB29_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB29_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB29_2
 ; CHECK-NOLSE-O0-NEXT:  LBB29_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB29_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB29_1
@@ -2164,24 +2164,24 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB30_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and w12, w10, w8
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    and w11, w10, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB30_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB30_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB30_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB30_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB30_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB30_2
 ; CHECK-NOLSE-O0-NEXT:  LBB30_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB30_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB30_1
@@ -2262,24 +2262,24 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB31_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    orr w12, w10, w8
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    orr w11, w10, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB31_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB31_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB31_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB31_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB31_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB31_2
 ; CHECK-NOLSE-O0-NEXT:  LBB31_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB31_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB31_1
@@ -2356,24 +2356,24 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB32_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    eor w12, w10, w8
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    eor w11, w10, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB32_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB32_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB32_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB32_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB32_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB32_2
 ; CHECK-NOLSE-O0-NEXT:  LBB32_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB32_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB32_1
@@ -2453,26 +2453,26 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB33_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    sxtb w9, w10
-; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, sxtb
-; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, le
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    sxtb w11, w10
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, sxtb
+; CHECK-NOLSE-O0-NEXT:    csel w11, w10, w9, le
 ; CHECK-NOLSE-O0-NEXT:  LBB33_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB33_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB33_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB33_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB33_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB33_2
 ; CHECK-NOLSE-O0-NEXT:  LBB33_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB33_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB33_1
@@ -2501,13 +2501,13 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    sxtb w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, sxtb
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_acq
 ; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w9, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB33_1
@@ -2573,26 +2573,26 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB34_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    sxtb w9, w10
-; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, sxtb
-; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, gt
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    sxtb w11, w10
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, sxtb
+; CHECK-NOLSE-O0-NEXT:    csel w11, w10, w9, gt
 ; CHECK-NOLSE-O0-NEXT:  LBB34_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB34_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB34_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB34_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB34_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB34_2
 ; CHECK-NOLSE-O0-NEXT:  LBB34_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB34_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB34_1
@@ -2621,13 +2621,13 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    sxtb w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, sxtb
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w9, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB34_1
@@ -2695,26 +2695,26 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB35_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and w9, w10, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, uxtb
-; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, ls
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    and w11, w10, #0xff
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, uxtb
+; CHECK-NOLSE-O0-NEXT:    csel w11, w10, w9, ls
 ; CHECK-NOLSE-O0-NEXT:  LBB35_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB35_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB35_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB35_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB35_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB35_2
 ; CHECK-NOLSE-O0-NEXT:  LBB35_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB35_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB35_1
@@ -2743,13 +2743,13 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w9, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_acq_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w9, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB35_1
@@ -2817,26 +2817,26 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB36_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and w9, w10, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, uxtb
-; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, hi
+; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    and w11, w10, #0xff
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, uxtb
+; CHECK-NOLSE-O0-NEXT:    csel w11, w10, w9, hi
 ; CHECK-NOLSE-O0-NEXT:  LBB36_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB36_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB36_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB36_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB36_2
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w11, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB36_2
 ; CHECK-NOLSE-O0-NEXT:  LBB36_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB36_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w10, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB36_1
@@ -2865,13 +2865,13 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w9, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w9, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB36_1
@@ -2934,24 +2934,24 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB37_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    add w12, w9, w8, uxth
+; CHECK-NOLSE-O0-NEXT:    add w11, w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:  LBB37_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB37_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB37_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB37_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB37_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB37_2
 ; CHECK-NOLSE-O0-NEXT:  LBB37_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB37_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB37_1
@@ -3027,23 +3027,23 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB38_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w11, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:  LBB38_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB38_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB38_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB38_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB38_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB38_2
 ; CHECK-NOLSE-O0-NEXT:  LBB38_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB38_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB38_1
@@ -3120,24 +3120,24 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB39_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    subs w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB39_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB39_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB39_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB39_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB39_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB39_2
 ; CHECK-NOLSE-O0-NEXT:  LBB39_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB39_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB39_1
@@ -3218,24 +3218,24 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB40_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    and w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB40_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB40_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB40_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB40_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB40_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB40_2
 ; CHECK-NOLSE-O0-NEXT:  LBB40_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB40_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB40_1
@@ -3316,24 +3316,24 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB41_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    orr w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB41_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB41_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB41_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB41_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB41_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB41_2
 ; CHECK-NOLSE-O0-NEXT:  LBB41_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB41_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB41_1
@@ -3410,24 +3410,24 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB42_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    eor w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    eor w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB42_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB42_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB42_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB42_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB42_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB42_2
 ; CHECK-NOLSE-O0-NEXT:  LBB42_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB42_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB42_1
@@ -3507,26 +3507,26 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB43_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    sxth w10, w8
-; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, sxth
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, le
+; CHECK-NOLSE-O0-NEXT:    sxth w11, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, sxth
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, le
 ; CHECK-NOLSE-O0-NEXT:  LBB43_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB43_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB43_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB43_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB43_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB43_2
 ; CHECK-NOLSE-O0-NEXT:  LBB43_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB43_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB43_1
@@ -3555,13 +3555,13 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    sxth w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, sxth
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_acq
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w0, uxth
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB43_1
@@ -3627,26 +3627,26 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB44_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    sxth w10, w8
-; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, sxth
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, gt
+; CHECK-NOLSE-O0-NEXT:    sxth w11, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, sxth
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, gt
 ; CHECK-NOLSE-O0-NEXT:  LBB44_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB44_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB44_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB44_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB44_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB44_2
 ; CHECK-NOLSE-O0-NEXT:  LBB44_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB44_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB44_1
@@ -3675,13 +3675,13 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    sxth w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, sxth
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w0, uxth
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB44_1
@@ -3749,26 +3749,26 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB45_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    uxth w10, w8
-; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, uxth
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, ls
+; CHECK-NOLSE-O0-NEXT:    uxth w11, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, ls
 ; CHECK-NOLSE-O0-NEXT:  LBB45_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB45_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB45_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB45_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB45_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB45_2
 ; CHECK-NOLSE-O0-NEXT:  LBB45_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB45_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB45_1
@@ -3797,13 +3797,13 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_acq_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w0, uxth
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB45_1
@@ -3871,26 +3871,26 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB46_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    uxth w10, w8
-; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, uxth
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, hi
+; CHECK-NOLSE-O0-NEXT:    uxth w11, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w11, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, hi
 ; CHECK-NOLSE-O0-NEXT:  LBB46_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB46_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB46_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB46_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB46_2
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB46_2
 ; CHECK-NOLSE-O0-NEXT:  LBB46_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB46_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB46_1
@@ -3919,13 +3919,13 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w9, w0
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w9, w8, uxth
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w0, uxth
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB46_1
@@ -3988,22 +3988,22 @@ define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB47_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    add w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    add w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB47_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB47_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB47_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB47_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB47_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB47_2
 ; CHECK-NOLSE-O0-NEXT:  LBB47_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB47_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4081,21 +4081,21 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB48_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr w11, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:  LBB48_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB48_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB48_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB48_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB48_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB48_2
 ; CHECK-NOLSE-O0-NEXT:  LBB48_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB48_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4173,22 +4173,22 @@ define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB49_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    subs w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB49_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB49_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB49_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB49_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB49_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB49_2
 ; CHECK-NOLSE-O0-NEXT:  LBB49_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB49_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4270,22 +4270,22 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB50_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    and w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB50_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB50_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB50_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB50_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB50_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB50_2
 ; CHECK-NOLSE-O0-NEXT:  LBB50_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB50_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4367,22 +4367,22 @@ define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB51_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    orr w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB51_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB51_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB51_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB51_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB51_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB51_2
 ; CHECK-NOLSE-O0-NEXT:  LBB51_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB51_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4460,22 +4460,22 @@ define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB52_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    eor w12, w8, w9
+; CHECK-NOLSE-O0-NEXT:    eor w11, w8, w9
 ; CHECK-NOLSE-O0-NEXT:  LBB52_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB52_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB52_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB52_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB52_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB52_2
 ; CHECK-NOLSE-O0-NEXT:  LBB52_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB52_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4554,23 +4554,23 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB53_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, le
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, le
 ; CHECK-NOLSE-O0-NEXT:  LBB53_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB53_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB53_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB53_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB53_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB53_2
 ; CHECK-NOLSE-O0-NEXT:  LBB53_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB53_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4599,11 +4599,11 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
@@ -4668,23 +4668,23 @@ define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB54_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, gt
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, gt
 ; CHECK-NOLSE-O0-NEXT:  LBB54_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB54_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB54_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB54_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB54_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB54_2
 ; CHECK-NOLSE-O0-NEXT:  LBB54_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB54_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4713,11 +4713,11 @@ define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
@@ -4782,23 +4782,23 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB55_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, ls
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, ls
 ; CHECK-NOLSE-O0-NEXT:  LBB55_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB55_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB55_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB55_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB55_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB55_2
 ; CHECK-NOLSE-O0-NEXT:  LBB55_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB55_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4827,11 +4827,11 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
@@ -4896,23 +4896,23 @@ define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB56_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
-; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, hi
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w9
+; CHECK-NOLSE-O0-NEXT:    csel w11, w8, w9, hi
 ; CHECK-NOLSE-O0-NEXT:  LBB56_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB56_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB56_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB56_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB56_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB56_2
 ; CHECK-NOLSE-O0-NEXT:  LBB56_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB56_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
+; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
@@ -4941,11 +4941,11 @@ define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
@@ -5009,22 +5009,22 @@ define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB57_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    add x12, x8, x9
+; CHECK-NOLSE-O0-NEXT:    add x11, x8, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB57_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB57_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB57_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB57_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB57_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB57_2
 ; CHECK-NOLSE-O0-NEXT:  LBB57_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB57_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5101,21 +5101,21 @@ define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB58_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x12, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:  LBB58_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB58_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB58_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB58_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB58_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB58_2
 ; CHECK-NOLSE-O0-NEXT:  LBB58_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB58_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5193,22 +5193,22 @@ define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB59_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs x12, x8, x9
+; CHECK-NOLSE-O0-NEXT:    subs x11, x8, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB59_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB59_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB59_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB59_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB59_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB59_2
 ; CHECK-NOLSE-O0-NEXT:  LBB59_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB59_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5290,22 +5290,22 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB60_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    and x12, x8, x9
+; CHECK-NOLSE-O0-NEXT:    and x11, x8, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB60_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB60_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB60_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB60_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB60_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB60_2
 ; CHECK-NOLSE-O0-NEXT:  LBB60_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB60_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5387,22 +5387,22 @@ define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB61_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    orr x12, x8, x9
+; CHECK-NOLSE-O0-NEXT:    orr x11, x8, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB61_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB61_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB61_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB61_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB61_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB61_2
 ; CHECK-NOLSE-O0-NEXT:  LBB61_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB61_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5480,22 +5480,22 @@ define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB62_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    eor x12, x8, x9
+; CHECK-NOLSE-O0-NEXT:    eor x11, x8, x9
 ; CHECK-NOLSE-O0-NEXT:  LBB62_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB62_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB62_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB62_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB62_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB62_2
 ; CHECK-NOLSE-O0-NEXT:  LBB62_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB62_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5574,23 +5574,23 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB63_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
-; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, le
+; CHECK-NOLSE-O0-NEXT:    cmp x8, x9
+; CHECK-NOLSE-O0-NEXT:    csel x11, x8, x9, le
 ; CHECK-NOLSE-O0-NEXT:  LBB63_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB63_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB63_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB63_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB63_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB63_2
 ; CHECK-NOLSE-O0-NEXT:  LBB63_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB63_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5619,11 +5619,11 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, le
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
@@ -5688,23 +5688,23 @@ define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB64_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
-; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, gt
+; CHECK-NOLSE-O0-NEXT:    cmp x8, x9
+; CHECK-NOLSE-O0-NEXT:    csel x11, x8, x9, gt
 ; CHECK-NOLSE-O0-NEXT:  LBB64_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB64_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB64_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB64_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB64_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB64_2
 ; CHECK-NOLSE-O0-NEXT:  LBB64_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB64_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5733,11 +5733,11 @@ define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, gt
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
@@ -5802,23 +5802,23 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB65_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
-; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, ls
+; CHECK-NOLSE-O0-NEXT:    cmp x8, x9
+; CHECK-NOLSE-O0-NEXT:    csel x11, x8, x9, ls
 ; CHECK-NOLSE-O0-NEXT:  LBB65_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB65_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB65_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB65_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB65_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB65_2
 ; CHECK-NOLSE-O0-NEXT:  LBB65_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB65_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5847,11 +5847,11 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, ls
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
@@ -5916,23 +5916,23 @@ define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB66_2 Depth 2
 ; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NOLSE-O0-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
-; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
-; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, hi
+; CHECK-NOLSE-O0-NEXT:    cmp x8, x9
+; CHECK-NOLSE-O0-NEXT:    csel x11, x8, x9, hi
 ; CHECK-NOLSE-O0-NEXT:  LBB66_2: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB66_1 Depth=1
 ; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
-; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x10]
 ; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB66_4
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB66_2 Depth=2
-; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
-; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB66_2
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x11, [x10]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB66_2
 ; CHECK-NOLSE-O0-NEXT:  LBB66_4: ; %atomicrmw.start
 ; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB66_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
+; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
 ; CHECK-NOLSE-O0-NEXT:    cset w8, eq
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
 ; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
@@ -5961,11 +5961,11 @@ define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
-; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, hi
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x8
 ; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
 ; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
@@ -6038,17 +6038,17 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ;
 ; CHECK-NOLSE-O0-LABEL: cmpxchg_i8:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB67_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxrb w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxrb w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1, uxtb
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB67_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB67_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB67_1
+; CHECK-NOLSE-O0-NEXT:    stlxrb wzr, w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB67_1
 ; CHECK-NOLSE-O0-NEXT:  LBB67_3:
 ; CHECK-NOLSE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w1, uxtb
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w1, uxtb
 ; CHECK-NOLSE-O0-NEXT:    cset w1, eq
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -6067,7 +6067,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w1, uxtb
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w1, uxtb
 ; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
 ; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
@@ -6089,7 +6089,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-LSE-O0-NEXT:    mov x0, x1
 ; CHECK-LSE-O0-NEXT:    casb w0, w2, [x8]
 ; CHECK-LSE-O0-NEXT:    and w8, w0, #0xff
-; CHECK-LSE-O0-NEXT:    subs w8, w8, w1, uxtb
+; CHECK-LSE-O0-NEXT:    cmp w8, w1, uxtb
 ; CHECK-LSE-O0-NEXT:    cset w1, eq
 ; CHECK-LSE-O0-NEXT:    ret
   %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic
@@ -6144,17 +6144,17 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ;
 ; CHECK-NOLSE-O0-LABEL: cmpxchg_i16:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB68_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxrh w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxrh w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1, uxth
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB68_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB68_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxrh w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB68_1
+; CHECK-NOLSE-O0-NEXT:    stlxrh wzr, w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB68_1
 ; CHECK-NOLSE-O0-NEXT:  LBB68_3:
 ; CHECK-NOLSE-O0-NEXT:    and w8, w0, #0xffff
-; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w1, uxth
+; CHECK-NOLSE-O0-NEXT:    cmp w8, w1, uxth
 ; CHECK-NOLSE-O0-NEXT:    cset w1, eq
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -6173,7 +6173,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xffff
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w1, uxth
+; CHECK-OUTLINE-O0-NEXT:    cmp w8, w1, uxth
 ; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
 ; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
@@ -6195,7 +6195,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-LSE-O0-NEXT:    mov x0, x1
 ; CHECK-LSE-O0-NEXT:    cash w0, w2, [x8]
 ; CHECK-LSE-O0-NEXT:    and w8, w0, #0xffff
-; CHECK-LSE-O0-NEXT:    subs w8, w8, w1, uxth
+; CHECK-LSE-O0-NEXT:    cmp w8, w1, uxth
 ; CHECK-LSE-O0-NEXT:    cset w1, eq
 ; CHECK-LSE-O0-NEXT:    ret
   %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic
@@ -6248,16 +6248,16 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ;
 ; CHECK-NOLSE-O0-LABEL: cmpxchg_i32:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB69_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB69_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB69_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB69_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB69_1
 ; CHECK-NOLSE-O0-NEXT:  LBB69_3:
-; CHECK-NOLSE-O0-NEXT:    subs w8, w0, w1
+; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
 ; CHECK-NOLSE-O0-NEXT:    cset w1, eq
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -6275,7 +6275,7 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w1
+; CHECK-OUTLINE-O0-NEXT:    cmp w0, w1
 ; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
 ; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
@@ -6295,7 +6295,7 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-LSE-O0-NEXT:    mov x8, x0
 ; CHECK-LSE-O0-NEXT:    mov x0, x1
 ; CHECK-LSE-O0-NEXT:    cas w0, w2, [x8]
-; CHECK-LSE-O0-NEXT:    subs w8, w0, w1
+; CHECK-LSE-O0-NEXT:    cmp w0, w1
 ; CHECK-LSE-O0-NEXT:    cset w1, eq
 ; CHECK-LSE-O0-NEXT:    ret
   %res = cmpxchg ptr %ptr, i32 %desired, i32 %new monotonic monotonic
@@ -6346,16 +6346,16 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
 ;
 ; CHECK-NOLSE-O0-LABEL: cmpxchg_i64:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB70_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB70_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB70_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB70_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB70_1
 ; CHECK-NOLSE-O0-NEXT:  LBB70_3:
-; CHECK-NOLSE-O0-NEXT:    subs x8, x0, x1
+; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    cset w1, eq
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -6373,7 +6373,7 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x1
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x1
 ; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
 ; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
@@ -6393,7 +6393,7 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
 ; CHECK-LSE-O0-NEXT:    mov x8, x0
 ; CHECK-LSE-O0-NEXT:    mov x0, x1
 ; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
-; CHECK-LSE-O0-NEXT:    subs x8, x0, x1
+; CHECK-LSE-O0-NEXT:    cmp x0, x1
 ; CHECK-LSE-O0-NEXT:    cset w1, eq
 ; CHECK-LSE-O0-NEXT:    ret
   %res = cmpxchg ptr %ptr, i64 %desired, i64 %new monotonic monotonic
@@ -6444,16 +6444,16 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
 ;
 ; CHECK-NOLSE-O0-LABEL: cmpxchg_ptr:
 ; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    mov x9, x0
+; CHECK-NOLSE-O0-NEXT:    mov x8, x0
 ; CHECK-NOLSE-O0-NEXT:  LBB71_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
+; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x8]
 ; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    b.ne LBB71_3
 ; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB71_1 Depth=1
-; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
-; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB71_1
+; CHECK-NOLSE-O0-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NOLSE-O0-NEXT:    cbnz wzr, LBB71_1
 ; CHECK-NOLSE-O0-NEXT:  LBB71_3:
-; CHECK-NOLSE-O0-NEXT:    subs x8, x0, x1
+; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
 ; CHECK-NOLSE-O0-NEXT:    cset w1, eq
 ; CHECK-NOLSE-O0-NEXT:    ret
 ;
@@ -6471,7 +6471,7 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
 ; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
 ; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
-; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x1
+; CHECK-OUTLINE-O0-NEXT:    cmp x0, x1
 ; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
 ; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
@@ -6491,7 +6491,7 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
 ; CHECK-LSE-O0-NEXT:    mov x8, x0
 ; CHECK-LSE-O0-NEXT:    mov x0, x1
 ; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
-; CHECK-LSE-O0-NEXT:    subs x8, x0, x1
+; CHECK-LSE-O0-NEXT:    cmp x0, x1
 ; CHECK-LSE-O0-NEXT:    cset w1, eq
 ; CHECK-LSE-O0-NEXT:    ret
   %res = cmpxchg ptr %ptr, ptr %desired, ptr %new monotonic monotonic
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index abc67eec32391..895a3762aa7c9 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --debugify-and-strip-all-safe=0 -mtriple=arm64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | \
 ; RUN:     grep -v "Verify generated machine code" | FileCheck %s
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index 1d33545cb171a..d8cb60b6919b2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -263,18 +263,18 @@ entry:
 define bfloat @sitofp_bf_i164(i64 %a) nounwind ssp {
 ; CHECK-LABEL: sitofp_bf_i164:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    subs x8, x0, #0
+; CHECK-NEXT:    cmp x0, #0
 ; CHECK-NEXT:    cneg x10, x0, mi
 ; CHECK-NEXT:    and x8, x10, #0xfffffffffffff000
 ; CHECK-NEXT:    lsr x9, x10, #53
-; CHECK-NEXT:    subs x9, x9, #0
+; CHECK-NEXT:    cmp x9, #0
 ; CHECK-NEXT:    csel x8, x8, x10, ne
 ; CHECK-NEXT:    scvtf d0, x8
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    and x9, x0, #0x8000000000000000
 ; CHECK-NEXT:    orr x8, x8, x9
 ; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    ands x10, x10, #0xfff
+; CHECK-NEXT:    tst x10, #0xfff
 ; CHECK-NEXT:    csel w9, wzr, w9, eq
 ; CHECK-NEXT:    mov w9, w9
 ; CHECK-NEXT:    // kill: def $x9 killed $w9
@@ -381,12 +381,12 @@ define bfloat @uitofp_bf_i64(i64 %a) nounwind ssp {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    and x8, x0, #0xfffffffffffff000
 ; CHECK-NEXT:    lsr x9, x0, #53
-; CHECK-NEXT:    subs x9, x9, #0
+; CHECK-NEXT:    cmp x9, #0
 ; CHECK-NEXT:    csel x8, x8, x0, ne
 ; CHECK-NEXT:    ucvtf d0, x8
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    cset w9, ne
-; CHECK-NEXT:    ands x10, x0, #0xfff
+; CHECK-NEXT:    tst x0, #0xfff
 ; CHECK-NEXT:    csel w9, wzr, w9, eq
 ; CHECK-NEXT:    mov w9, w9
 ; CHECK-NEXT:    // kill: def $x9 killed $w9
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
index a902f88fce6b9..115696a8bb574 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
@@ -1,12 +1,8 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 define void @t0(i32 %a) nounwind {
 entry:
-; CHECK-LABEL: t0:
-; CHECK: str {{w[0-9]+}}, [sp, #12]
-; CHECK-NEXT: ldr [[REGISTER:w[0-9]+]], [sp, #12]
-; CHECK-NEXT: str [[REGISTER]], [sp, #12]
-; CHECK: ret
   %a.addr = alloca i32, align 4
   store i32 %a, ptr %a.addr
   %tmp = load i32, ptr %a.addr
@@ -15,11 +11,6 @@ entry:
 }
 
 define void @t1(i64 %a) nounwind {
-; CHECK-LABEL: t1:
-; CHECK: str {{x[0-9]+}}, [sp, #8]
-; CHECK-NEXT: ldr [[REGISTER:x[0-9]+]], [sp, #8]
-; CHECK-NEXT: str [[REGISTER]], [sp, #8]
-; CHECK: ret
   %a.addr = alloca i64, align 4
   store i64 %a, ptr %a.addr
   %tmp = load i64, ptr %a.addr
@@ -29,14 +20,6 @@ define void @t1(i64 %a) nounwind {
 
 define zeroext i1 @i1(i1 %a) nounwind {
 entry:
-; CHECK-LABEL: i1:
-; CHECK: and [[REG:w[0-9]+]], w0, #0x1
-; CHECK: strb [[REG]], [sp, #15]
-; CHECK: ldrb [[REG1:w[0-9]+]], [sp, #15]
-; CHECK: and [[REG2:w[0-9]+]], [[REG1]], #0x1
-; CHECK: and w0, [[REG2]], #0x1
-; CHECK: add sp, sp, #16
-; CHECK: ret
   %a.addr = alloca i1, align 1
   store i1 %a, ptr %a.addr, align 1
   %0 = load i1, ptr %a.addr, align 1
@@ -45,9 +28,6 @@ entry:
 
 define i32 @t2(ptr %ptr) nounwind {
 entry:
-; CHECK-LABEL: t2:
-; CHECK: ldur w0, [x0, #-4]
-; CHECK: ret
   %0 = getelementptr i32, ptr %ptr, i32 -1
   %1 = load i32, ptr %0, align 4
   ret i32 %1
@@ -55,9 +35,6 @@ entry:
 
 define i32 @t3(ptr %ptr) nounwind {
 entry:
-; CHECK-LABEL: t3:
-; CHECK: ldur w0, [x0, #-256]
-; CHECK: ret
   %0 = getelementptr i32, ptr %ptr, i32 -64
   %1 = load i32, ptr %0, align 4
   ret i32 %1
@@ -65,9 +42,6 @@ entry:
 
 define void @t4(ptr %ptr) nounwind {
 entry:
-; CHECK-LABEL: t4:
-; CHECK: stur wzr, [x0, #-4]
-; CHECK: ret
   %0 = getelementptr i32, ptr %ptr, i32 -1
   store i32 0, ptr %0, align 4
   ret void
@@ -75,17 +49,12 @@ entry:
 
 define void @t5(ptr %ptr) nounwind {
 entry:
-; CHECK-LABEL: t5:
-; CHECK: stur wzr, [x0, #-256]
-; CHECK: ret
   %0 = getelementptr i32, ptr %ptr, i32 -64
   store i32 0, ptr %0, align 4
   ret void
 }
 
 define void @t6() nounwind {
-; CHECK-LABEL: t6:
-; CHECK: brk #0x1
   tail call void @llvm.trap()
   ret void
 }
@@ -106,9 +75,6 @@ entry:
 }
 
 define i64 @mul_umul(i64 %arg) {
-; CHECK-LABEL: mul_umul:
-; CHECK: mul x{{[0-9]+}}, [[ARG1:x[0-9]+]], [[ARG2:x[0-9]+]]
-; CHECK-NEXT: umulh x{{[0-9]+}}, [[ARG1]], [[ARG2]]
 entry:
   %sub.ptr.div = sdiv exact i64 %arg, 8
   %tmp = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %sub.ptr.div, i64 8)
@@ -121,9 +87,6 @@ declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
 define void @logicalReg(i1 %arg) {
 ; Make sure we generate a logical reg = reg, reg instruction without any
 ; machine verifier errors.
-; CHECK-LABEL: logicalReg:
-; CHECK: orr w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}
-; CHECK: ret
 entry:
   br i1 %arg, label %cond.end, label %cond.false
 
@@ -136,3 +99,5 @@ cond.end:
   ret void
 }
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
index 71e0250b36972..026d877b670f5 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
@@ -17,21 +17,21 @@ define i8 @test_rmw_add_8(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
-; NOLSE-NEXT:    add w12, w9, #1
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    add w11, w9, #1
 ; NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxrb w8, [x11]
+; NOLSE-NEXT:    ldaxrb w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9, uxtb
 ; NOLSE-NEXT:    b.ne .LBB0_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
-; NOLSE-NEXT:    stlxrb w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB0_2
+; NOLSE-NEXT:    stlxrb wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB0_2
 ; NOLSE-NEXT:  .LBB0_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9, uxtb
+; NOLSE-NEXT:    cmp w8, w9, uxtb
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB0_1
@@ -64,21 +64,21 @@ define i16 @test_rmw_add_16(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
-; NOLSE-NEXT:    add w12, w9, #1
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    add w11, w9, #1
 ; NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxrh w8, [x11]
+; NOLSE-NEXT:    ldaxrh w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9, uxth
 ; NOLSE-NEXT:    b.ne .LBB1_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
-; NOLSE-NEXT:    stlxrh w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB1_2
+; NOLSE-NEXT:    stlxrh wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB1_2
 ; NOLSE-NEXT:  .LBB1_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9, uxth
+; NOLSE-NEXT:    cmp w8, w9, uxth
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB1_1
@@ -111,21 +111,21 @@ define i32 @test_rmw_add_32(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
-; NOLSE-NEXT:    add w12, w9, #1
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    add w11, w9, #1
 ; NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr w8, [x11]
+; NOLSE-NEXT:    ldaxr w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9
 ; NOLSE-NEXT:    b.ne .LBB2_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
-; NOLSE-NEXT:    stlxr w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB2_2
+; NOLSE-NEXT:    stlxr wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB2_2
 ; NOLSE-NEXT:  .LBB2_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9
+; NOLSE-NEXT:    cmp w8, w9
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB2_1
@@ -158,21 +158,21 @@ define i64 @test_rmw_add_64(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
 ; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
-; NOLSE-NEXT:    add x12, x9, #1
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    add x11, x9, #1
 ; NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr x8, [x11]
+; NOLSE-NEXT:    ldaxr x8, [x10]
 ; NOLSE-NEXT:    cmp x8, x9
 ; NOLSE-NEXT:    b.ne .LBB3_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
-; NOLSE-NEXT:    stlxr w10, x12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB3_2
+; NOLSE-NEXT:    stlxr wzr, x11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB3_2
 ; NOLSE-NEXT:  .LBB3_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
-; NOLSE-NEXT:    subs x9, x8, x9
+; NOLSE-NEXT:    cmp x8, x9
 ; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
 ; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB3_1
@@ -236,7 +236,7 @@ define i128 @test_rmw_add_128(ptr %dst)   {
 ; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
 ; NOLSE-NEXT:    mov x9, x10
 ; NOLSE-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
-; NOLSE-NEXT:    subs x12, x12, x13
+; NOLSE-NEXT:    cmp x12, x13
 ; NOLSE-NEXT:    ccmp x10, x11, #0, eq
 ; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
 ; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
@@ -274,7 +274,7 @@ define i128 @test_rmw_add_128(ptr %dst)   {
 ; LSE-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
 ; LSE-NEXT:    mov x8, x1
 ; LSE-NEXT:    str x8, [sp, #16] // 8-byte Folded Spill
-; LSE-NEXT:    subs x11, x8, x11
+; LSE-NEXT:    cmp x8, x11
 ; LSE-NEXT:    ccmp x9, x10, #0, eq
 ; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
 ; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
@@ -302,22 +302,22 @@ define i8 @test_rmw_nand_8(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
 ; NOLSE-NEXT:    mvn w8, w9
-; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
+; NOLSE-NEXT:    orr w11, w8, #0xfffffffe
 ; NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxrb w8, [x11]
+; NOLSE-NEXT:    ldaxrb w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9, uxtb
 ; NOLSE-NEXT:    b.ne .LBB5_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
-; NOLSE-NEXT:    stlxrb w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB5_2
+; NOLSE-NEXT:    stlxrb wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB5_2
 ; NOLSE-NEXT:  .LBB5_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9, uxtb
+; NOLSE-NEXT:    cmp w8, w9, uxtb
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB5_1
@@ -343,7 +343,7 @@ define i8 @test_rmw_nand_8(ptr %dst)   {
 ; LSE-NEXT:    orr w10, w8, #0xfffffffe
 ; LSE-NEXT:    mov w8, w9
 ; LSE-NEXT:    casalb w8, w10, [x11]
-; LSE-NEXT:    subs w9, w8, w9, uxtb
+; LSE-NEXT:    cmp w8, w9, uxtb
 ; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; LSE-NEXT:    b.ne .LBB5_1
@@ -370,22 +370,22 @@ define i16 @test_rmw_nand_16(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB6_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
 ; NOLSE-NEXT:    mvn w8, w9
-; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
+; NOLSE-NEXT:    orr w11, w8, #0xfffffffe
 ; NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB6_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxrh w8, [x11]
+; NOLSE-NEXT:    ldaxrh w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9, uxth
 ; NOLSE-NEXT:    b.ne .LBB6_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=2
-; NOLSE-NEXT:    stlxrh w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB6_2
+; NOLSE-NEXT:    stlxrh wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB6_2
 ; NOLSE-NEXT:  .LBB6_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB6_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9, uxth
+; NOLSE-NEXT:    cmp w8, w9, uxth
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB6_1
@@ -411,7 +411,7 @@ define i16 @test_rmw_nand_16(ptr %dst)   {
 ; LSE-NEXT:    orr w10, w8, #0xfffffffe
 ; LSE-NEXT:    mov w8, w9
 ; LSE-NEXT:    casalh w8, w10, [x11]
-; LSE-NEXT:    subs w9, w8, w9, uxth
+; LSE-NEXT:    cmp w8, w9, uxth
 ; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; LSE-NEXT:    b.ne .LBB6_1
@@ -438,22 +438,22 @@ define i32 @test_rmw_nand_32(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
 ; NOLSE-NEXT:    ldr w9, [sp, #28] // 4-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
 ; NOLSE-NEXT:    mvn w8, w9
-; NOLSE-NEXT:    orr w12, w8, #0xfffffffe
+; NOLSE-NEXT:    orr w11, w8, #0xfffffffe
 ; NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr w8, [x11]
+; NOLSE-NEXT:    ldaxr w8, [x10]
 ; NOLSE-NEXT:    cmp w8, w9
 ; NOLSE-NEXT:    b.ne .LBB7_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
-; NOLSE-NEXT:    stlxr w10, w12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB7_2
+; NOLSE-NEXT:    stlxr wzr, w11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB7_2
 ; NOLSE-NEXT:  .LBB7_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
-; NOLSE-NEXT:    subs w9, w8, w9
+; NOLSE-NEXT:    cmp w8, w9
 ; NOLSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; NOLSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB7_1
@@ -479,7 +479,7 @@ define i32 @test_rmw_nand_32(ptr %dst)   {
 ; LSE-NEXT:    orr w10, w8, #0xfffffffe
 ; LSE-NEXT:    mov w8, w9
 ; LSE-NEXT:    casal w8, w10, [x11]
-; LSE-NEXT:    subs w9, w8, w9
+; LSE-NEXT:    cmp w8, w9
 ; LSE-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
 ; LSE-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
 ; LSE-NEXT:    b.ne .LBB7_1
@@ -506,25 +506,25 @@ define i64 @test_rmw_nand_64(ptr %dst)   {
 ; NOLSE-NEXT:    // =>This Loop Header: Depth=1
 ; NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
 ; NOLSE-NEXT:    ldr x9, [sp, #24] // 8-byte Folded Reload
-; NOLSE-NEXT:    ldr x11, [sp, #16] // 8-byte Folded Reload
+; NOLSE-NEXT:    ldr x10, [sp, #16] // 8-byte Folded Reload
 ; NOLSE-NEXT:    mov w8, w9
-; NOLSE-NEXT:    mvn w10, w8
+; NOLSE-NEXT:    mvn w11, w8
 ; NOLSE-NEXT:    // implicit-def: $x8
-; NOLSE-NEXT:    mov w8, w10
-; NOLSE-NEXT:    orr x12, x8, #0xfffffffffffffffe
+; NOLSE-NEXT:    mov w8, w11
+; NOLSE-NEXT:    orr x11, x8, #0xfffffffffffffffe
 ; NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
 ; NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
 ; NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
-; NOLSE-NEXT:    ldaxr x8, [x11]
+; NOLSE-NEXT:    ldaxr x8, [x10]
 ; NOLSE-NEXT:    cmp x8, x9
 ; NOLSE-NEXT:    b.ne .LBB8_4
 ; NOLSE-NEXT:  // %bb.3: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
-; NOLSE-NEXT:    stlxr w10, x12, [x11]
-; NOLSE-NEXT:    cbnz w10, .LBB8_2
+; NOLSE-NEXT:    stlxr wzr, x11, [x10]
+; NOLSE-NEXT:    cbnz wzr, .LBB8_2
 ; NOLSE-NEXT:  .LBB8_4: // %atomicrmw.start
 ; NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
-; NOLSE-NEXT:    subs x9, x8, x9
+; NOLSE-NEXT:    cmp x8, x9
 ; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
 ; NOLSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
 ; NOLSE-NEXT:    b.ne .LBB8_1
@@ -553,7 +553,7 @@ define i64 @test_rmw_nand_64(ptr %dst)   {
 ; LSE-NEXT:    orr x10, x8, #0xfffffffffffffffe
 ; LSE-NEXT:    mov x8, x9
 ; LSE-NEXT:    casal x8, x10, [x11]
-; LSE-NEXT:    subs x9, x8, x9
+; LSE-NEXT:    cmp x8, x9
 ; LSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
 ; LSE-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
 ; LSE-NEXT:    b.ne .LBB8_1
@@ -614,7 +614,7 @@ define i128 @test_rmw_nand_128(ptr %dst)   {
 ; NOLSE-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
 ; NOLSE-NEXT:    mov x9, x10
 ; NOLSE-NEXT:    str x9, [sp, #16] // 8-byte Folded Spill
-; NOLSE-NEXT:    subs x12, x12, x13
+; NOLSE-NEXT:    cmp x12, x13
 ; NOLSE-NEXT:    ccmp x10, x11, #0, eq
 ; NOLSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
 ; NOLSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
@@ -656,7 +656,7 @@ define i128 @test_rmw_nand_128(ptr %dst)   {
 ; LSE-NEXT:    str x9, [sp, #8] // 8-byte Folded Spill
 ; LSE-NEXT:    mov x8, x1
 ; LSE-NEXT:    str x8, [sp, #16] // 8-byte Folded Spill
-; LSE-NEXT:    subs x11, x8, x11
+; LSE-NEXT:    cmp x8, x11
 ; LSE-NEXT:    ccmp x9, x10, #0, eq
 ; LSE-NEXT:    str x9, [sp, #32] // 8-byte Folded Spill
 ; LSE-NEXT:    str x8, [sp, #40] // 8-byte Folded Spill
diff --git a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
index e0ffd81717402..32395feb34049 100644
--- a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
+++ b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 -global-isel=false < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s
 
@@ -14,6 +15,54 @@ declare void @foo()
 ; NOOPT:       subs w{{[0-9]+}}, w{{[0-9]+}}, #2
 ; CHECK:       b.hi [[L]]
 define void @test_and_not(i32 %a, i32 %b, i32 %c) {
+; OPT-LABEL: test_and_not:
+; OPT:       // %bb.0: // %bb1
+; OPT-NEXT:    cbz w0, .LBB0_4
+; OPT-NEXT:  // %bb.1: // %bb1
+; OPT-NEXT:    cmp w1, #2
+; OPT-NEXT:    b.lo .LBB0_4
+; OPT-NEXT:  // %bb.2: // %bb1
+; OPT-NEXT:    cmp w2, #2
+; OPT-NEXT:    b.hi .LBB0_4
+; OPT-NEXT:  // %bb.3: // %common.ret
+; OPT-NEXT:    ret
+; OPT-NEXT:  .LBB0_4: // %bb3
+; OPT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OPT-NEXT:    .cfi_def_cfa_offset 16
+; OPT-NEXT:    .cfi_offset w30, -16
+; OPT-NEXT:    bl foo
+; OPT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OPT-NEXT:    ret
+;
+; NOOPT-LABEL: test_and_not:
+; NOOPT:       // %bb.0: // %bb1
+; NOOPT-NEXT:    sub sp, sp, #32
+; NOOPT-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NOOPT-NEXT:    .cfi_def_cfa_offset 32
+; NOOPT-NEXT:    .cfi_offset w30, -16
+; NOOPT-NEXT:    str w2, [sp, #8] // 4-byte Folded Spill
+; NOOPT-NEXT:    str w1, [sp, #12] // 4-byte Folded Spill
+; NOOPT-NEXT:    cbz w0, .LBB0_4
+; NOOPT-NEXT:    b .LBB0_1
+; NOOPT-NEXT:  .LBB0_1: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
+; NOOPT-NEXT:    cmp w8, #2
+; NOOPT-NEXT:    b.lo .LBB0_4
+; NOOPT-NEXT:    b .LBB0_2
+; NOOPT-NEXT:  .LBB0_2: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
+; NOOPT-NEXT:    cmp w8, #2
+; NOOPT-NEXT:    b.hi .LBB0_4
+; NOOPT-NEXT:    b .LBB0_3
+; NOOPT-NEXT:  .LBB0_3: // %bb2
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
+; NOOPT-NEXT:  .LBB0_4: // %bb3
+; NOOPT-NEXT:    bl foo
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
 bb1:
   %cmp1 = icmp ult i32 %a, 1
   %cmp2 = icmp ult i32 %b, 2
@@ -41,6 +90,54 @@ bb3:
 ; NOOPT:       subs w{{[0-9]+}}, w{{[0-9]+}}, #2
 ; CHECK:       b.hi [[L]]
 define void @test_and_not2(i32 %a, i32 %b, i32 %c) {
+; OPT-LABEL: test_and_not2:
+; OPT:       // %bb.0: // %bb1
+; OPT-NEXT:    cbz w0, .LBB1_4
+; OPT-NEXT:  // %bb.1: // %bb1
+; OPT-NEXT:    cmp w1, #2
+; OPT-NEXT:    b.lo .LBB1_4
+; OPT-NEXT:  // %bb.2: // %bb1
+; OPT-NEXT:    cmp w2, #2
+; OPT-NEXT:    b.hi .LBB1_4
+; OPT-NEXT:  // %bb.3: // %common.ret
+; OPT-NEXT:    ret
+; OPT-NEXT:  .LBB1_4: // %bb3
+; OPT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OPT-NEXT:    .cfi_def_cfa_offset 16
+; OPT-NEXT:    .cfi_offset w30, -16
+; OPT-NEXT:    bl foo
+; OPT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OPT-NEXT:    ret
+;
+; NOOPT-LABEL: test_and_not2:
+; NOOPT:       // %bb.0: // %bb1
+; NOOPT-NEXT:    sub sp, sp, #32
+; NOOPT-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NOOPT-NEXT:    .cfi_def_cfa_offset 32
+; NOOPT-NEXT:    .cfi_offset w30, -16
+; NOOPT-NEXT:    str w2, [sp, #8] // 4-byte Folded Spill
+; NOOPT-NEXT:    str w1, [sp, #12] // 4-byte Folded Spill
+; NOOPT-NEXT:    cbz w0, .LBB1_4
+; NOOPT-NEXT:    b .LBB1_1
+; NOOPT-NEXT:  .LBB1_1: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
+; NOOPT-NEXT:    cmp w8, #2
+; NOOPT-NEXT:    b.lo .LBB1_4
+; NOOPT-NEXT:    b .LBB1_2
+; NOOPT-NEXT:  .LBB1_2: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
+; NOOPT-NEXT:    cmp w8, #2
+; NOOPT-NEXT:    b.hi .LBB1_4
+; NOOPT-NEXT:    b .LBB1_3
+; NOOPT-NEXT:  .LBB1_3: // %bb2
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
+; NOOPT-NEXT:  .LBB1_4: // %bb3
+; NOOPT-NEXT:    bl foo
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
 bb1:
   %cmp1 = icmp ult i32 %a, 1
   %cmp2 = icmp ult i32 %b, 2
@@ -74,6 +171,52 @@ bb3:
 ; NOOPT: ldr [[R3:w[0-9]+]], [sp, #[[SLOT2]]]
 ; NOOPT: tbz [[R3]], #0, [[L]]
 define void @test_cmp_other_block(ptr %p, i1 %c) {
+; OPT-LABEL: test_cmp_other_block:
+; OPT:       // %bb.0: // %entry
+; OPT-NEXT:    ldr w8, [x0]
+; OPT-NEXT:    cmp w8, #0
+; OPT-NEXT:    b.gt .LBB2_3
+; OPT-NEXT:  // %bb.1: // %entry
+; OPT-NEXT:    tbz w1, #0, .LBB2_3
+; OPT-NEXT:  // %bb.2: // %common.ret
+; OPT-NEXT:    ret
+; OPT-NEXT:  .LBB2_3: // %bb3
+; OPT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OPT-NEXT:    .cfi_def_cfa_offset 16
+; OPT-NEXT:    .cfi_offset w30, -16
+; OPT-NEXT:    bl foo
+; OPT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OPT-NEXT:    ret
+;
+; NOOPT-LABEL: test_cmp_other_block:
+; NOOPT:       // %bb.0: // %entry
+; NOOPT-NEXT:    sub sp, sp, #32
+; NOOPT-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NOOPT-NEXT:    .cfi_def_cfa_offset 32
+; NOOPT-NEXT:    .cfi_offset w30, -16
+; NOOPT-NEXT:    str w1, [sp, #8] // 4-byte Folded Spill
+; NOOPT-NEXT:    ldr w8, [x0]
+; NOOPT-NEXT:    cmp w8, #0
+; NOOPT-NEXT:    cset w8, gt
+; NOOPT-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
+; NOOPT-NEXT:    b .LBB2_1
+; NOOPT-NEXT:  .LBB2_1: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
+; NOOPT-NEXT:    tbnz w8, #0, .LBB2_4
+; NOOPT-NEXT:    b .LBB2_2
+; NOOPT-NEXT:  .LBB2_2: // %bb1
+; NOOPT-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
+; NOOPT-NEXT:    tbz w8, #0, .LBB2_4
+; NOOPT-NEXT:    b .LBB2_3
+; NOOPT-NEXT:  .LBB2_3: // %bb2
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
+; NOOPT-NEXT:  .LBB2_4: // %bb3
+; NOOPT-NEXT:    bl foo
+; NOOPT-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NOOPT-NEXT:    add sp, sp, #32
+; NOOPT-NEXT:    ret
 entry:
   %l = load i32, ptr %p
   %cmp = icmp sgt i32 %l, 0
@@ -92,3 +235,5 @@ bb3:
   ret void
 }
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
index 1bf1477b79ced..d92d773c30001 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,88 +1,202 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false -mattr=+outline-atomics %s -o - | FileCheck -enable-var-scope %s --check-prefix=OUTLINE-ATOMICS
 
 define { i8, i1 } @test_cmpxchg_8(ptr %addr, i8 %desired, i8 %new) nounwind {
-; OUTLINE-ATOMICS: bl __aarch64_cas1_acq_rel
 ; CHECK-LABEL: test_cmpxchg_8:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxrb [[OLD:w[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD]], w1, uxtb
-; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxrb [[STATUS:w[0-9]+]], w2, [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
-; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1, uxtb
-; CHECK:     cset {{w[0-9]+}}, eq
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxrb w0, [x8]
+; CHECK-NEXT:    cmp w0, w1, uxtb
+; CHECK-NEXT:    b.ne .LBB0_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    stlxrb wzr, w2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB0_1
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    cmp w0, w1, uxtb
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_8:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str w2, [sp, #8] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov w8, w1
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #8] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x2, x0
+; OUTLINE-ATOMICS-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas1_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w1, uxtb
+; OUTLINE-ATOMICS-NEXT:    cset w1, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    ret
   %res = cmpxchg ptr %addr, i8 %desired, i8 %new seq_cst monotonic
   ret { i8, i1 } %res
 }
 
 define { i16, i1 } @test_cmpxchg_16(ptr %addr, i16 %desired, i16 %new) nounwind {
-; OUTLINE-ATOMICS: bl __aarch64_cas2_acq_rel
 ; CHECK-LABEL: test_cmpxchg_16:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxrh [[OLD:w[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD]], w1, uxth
-; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxrh [[STATUS:w[3-9]]], w2, [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
-; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1
-; CHECK:     cset {{w[0-9]+}}, eq
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB1_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxrh w0, [x8]
+; CHECK-NEXT:    cmp w0, w1, uxth
+; CHECK-NEXT:    b.ne .LBB1_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:    stlxrh wzr, w2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB1_1
+; CHECK-NEXT:  .LBB1_3:
+; CHECK-NEXT:    cmp w0, w1, uxth
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_16:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str w2, [sp, #8] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov w8, w1
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #8] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x2, x0
+; OUTLINE-ATOMICS-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas2_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w1, uxth
+; OUTLINE-ATOMICS-NEXT:    cset w1, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    ret
   %res = cmpxchg ptr %addr, i16 %desired, i16 %new seq_cst monotonic
   ret { i16, i1 } %res
 }
 
 define { i32, i1 } @test_cmpxchg_32(ptr %addr, i32 %desired, i32 %new) nounwind {
-; OUTLINE-ATOMICS: bl __aarch64_cas4_acq_rel
 ; CHECK-LABEL: test_cmpxchg_32:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxr [[OLD:w[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD]], w1
-; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxr [[STATUS:w[0-9]+]], w2, [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
-; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1
-; CHECK:     cset {{w[0-9]+}}, eq
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr w0, [x8]
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.ne .LBB2_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT:    stlxr wzr, w2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB2_1
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_32:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str w2, [sp, #8] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov w8, w1
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #8] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x2, x0
+; OUTLINE-ATOMICS-NEXT:    ldr w0, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas4_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr w1, [sp, #12] // 4-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp w0, w1
+; OUTLINE-ATOMICS-NEXT:    cset w1, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    ret
   %res = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst monotonic
   ret { i32, i1 } %res
 }
 
 define { i64, i1 } @test_cmpxchg_64(ptr %addr, i64 %desired, i64 %new) nounwind {
-; OUTLINE-ATOMICS: bl __aarch64_cas8_acq_rel
 ; CHECK-LABEL: test_cmpxchg_64:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxr [[OLD:x[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD]], x1
-; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxr [[STATUS:w[0-9]+]], x2, [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
-; CHECK:     subs {{x[0-9]+}}, [[OLD]], x1
-; CHECK:     cset {{w[0-9]+}}, eq
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB3_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr x0, [x8]
+; CHECK-NEXT:    cmp x0, x1
+; CHECK-NEXT:    b.ne .LBB3_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB3_1 Depth=1
+; CHECK-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB3_1
+; CHECK-NEXT:  .LBB3_3:
+; CHECK-NEXT:    cmp x0, x1
+; CHECK-NEXT:    cset w1, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_64:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str x2, [sp] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x8, x1
+; OUTLINE-ATOMICS-NEXT:    ldr x1, [sp] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x2, x0
+; OUTLINE-ATOMICS-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas8_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x1, [sp, #8] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp x0, x1
+; OUTLINE-ATOMICS-NEXT:    cset w1, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    ret
   %res = cmpxchg ptr %addr, i64 %desired, i64 %new seq_cst monotonic
   ret { i64, i1 } %res
 }
 
 define { i128, i1 } @test_cmpxchg_128(ptr %addr, i128 %desired, i128 %new) nounwind {
-; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel
 ; CHECK-LABEL: test_cmpxchg_128:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD_LO]], x2
-; CHECK:     cset [[CMP_TMP:w[0-9]+]], ne
-; CHECK:     cmp [[OLD_HI]], x3
-; CHECK:     cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
-; CHECK:     cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxp [[STATUS:w[0-9]+]], x4, x5, [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x9, x0
+; CHECK-NEXT:  .LBB4_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxp x0, x1, [x9]
+; CHECK-NEXT:    cmp x0, x2
+; CHECK-NEXT:    cset w8, ne
+; CHECK-NEXT:    cmp x1, x3
+; CHECK-NEXT:    cinc w8, w8, ne
+; CHECK-NEXT:    cbnz w8, .LBB4_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB4_1 Depth=1
+; CHECK-NEXT:    stlxp w8, x4, x5, [x9]
+; CHECK-NEXT:    cbnz w8, .LBB4_1
+; CHECK-NEXT:    b .LBB4_4
+; CHECK-NEXT:  .LBB4_3: // in Loop: Header=BB4_1 Depth=1
+; CHECK-NEXT:    stlxp w8, x0, x1, [x9]
+; CHECK-NEXT:    cbnz w8, .LBB4_1
+; CHECK-NEXT:  .LBB4_4:
+; CHECK-NEXT:    cmp x0, x2
+; CHECK-NEXT:    ccmp x1, x3, #0, eq
+; CHECK-NEXT:    cset w2, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_128:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #48
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str x5, [sp] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str x4, [sp, #8] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x1, x3
+; OUTLINE-ATOMICS-NEXT:    ldr x3, [sp] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str x1, [sp, #24] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x8, x2
+; OUTLINE-ATOMICS-NEXT:    ldr x2, [sp, #8] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    str x8, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    mov x4, x0
+; OUTLINE-ATOMICS-NEXT:    ldr x0, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x2, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ldr x3, [sp, #24] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp x0, x2
+; OUTLINE-ATOMICS-NEXT:    ccmp x1, x3, #0, eq
+; OUTLINE-ATOMICS-NEXT:    cset w2, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #48
+; OUTLINE-ATOMICS-NEXT:    ret
   %res = cmpxchg ptr %addr, i128 %desired, i128 %new seq_cst monotonic
   ret { i128, i1 } %res
 }
@@ -92,22 +206,55 @@ define { i128, i1 } @test_cmpxchg_128(ptr %addr, i128 %desired, i128 %new) nounw
 ; was false.
 @var128 = dso_local global i128 0
 define {i128, i1} @test_cmpxchg_128_unsplit(ptr %addr) {
-; OUTLINE-ATOMICS: bl __aarch64_cas16_acq_rel
 ; CHECK-LABEL: test_cmpxchg_128_unsplit:
-; CHECK:     mov [[ADDR:x[0-9]+]], x0
-; CHECK:     add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
-; CHECK:     ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]]
-; CHECK:     ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]]
-; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [[[ADDR]]]
-; CHECK:     cmp [[OLD_LO]], [[DESIRED_LO]]
-; CHECK:     cset [[CMP_TMP:w[0-9]+]], ne
-; CHECK:     cmp [[OLD_HI]], [[DESIRED_HI]]
-; CHECK:     cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
-; CHECK:     cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [[[ADDR]]]
-; CHECK:     cbnz [[STATUS]], [[RETRY]]
-; CHECK: [[DONE]]:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x11, x0
+; CHECK-NEXT:    adrp x10, var128
+; CHECK-NEXT:    add x10, x10, :lo12:var128
+; CHECK-NEXT:    ldp x9, x8, [x10]
+; CHECK-NEXT:    ldp x12, x13, [x10]
+; CHECK-NEXT:  .LBB5_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxp x0, x1, [x11]
+; CHECK-NEXT:    cmp x0, x9
+; CHECK-NEXT:    cset w10, ne
+; CHECK-NEXT:    cmp x1, x8
+; CHECK-NEXT:    cinc w10, w10, ne
+; CHECK-NEXT:    cbnz w10, .LBB5_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB5_1 Depth=1
+; CHECK-NEXT:    stlxp w10, x12, x13, [x11]
+; CHECK-NEXT:    cbnz w10, .LBB5_1
+; CHECK-NEXT:    b .LBB5_4
+; CHECK-NEXT:  .LBB5_3: // in Loop: Header=BB5_1 Depth=1
+; CHECK-NEXT:    stlxp w10, x0, x1, [x11]
+; CHECK-NEXT:    cbnz w10, .LBB5_1
+; CHECK-NEXT:  .LBB5_4:
+; CHECK-NEXT:    cmp x0, x9
+; CHECK-NEXT:    ccmp x1, x8, #0, eq
+; CHECK-NEXT:    cset w2, eq
+; CHECK-NEXT:    ret
+;
+; OUTLINE-ATOMICS-LABEL: test_cmpxchg_128_unsplit:
+; OUTLINE-ATOMICS:       // %bb.0:
+; OUTLINE-ATOMICS-NEXT:    sub sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    .cfi_def_cfa_offset 32
+; OUTLINE-ATOMICS-NEXT:    .cfi_offset w30, -16
+; OUTLINE-ATOMICS-NEXT:    mov x4, x0
+; OUTLINE-ATOMICS-NEXT:    adrp x8, var128
+; OUTLINE-ATOMICS-NEXT:    add x8, x8, :lo12:var128
+; OUTLINE-ATOMICS-NEXT:    ldp x0, x1, [x8]
+; OUTLINE-ATOMICS-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    str x0, [sp] // 8-byte Folded Spill
+; OUTLINE-ATOMICS-NEXT:    ldp x2, x3, [x8]
+; OUTLINE-ATOMICS-NEXT:    bl __aarch64_cas16_acq_rel
+; OUTLINE-ATOMICS-NEXT:    ldr x9, [sp] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    ldr x8, [sp, #8] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    cmp x0, x9
+; OUTLINE-ATOMICS-NEXT:    ccmp x1, x8, #0, eq
+; OUTLINE-ATOMICS-NEXT:    cset w2, eq
+; OUTLINE-ATOMICS-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; OUTLINE-ATOMICS-NEXT:    add sp, sp, #32
+; OUTLINE-ATOMICS-NEXT:    ret
 
   %desired = load volatile i128, ptr @var128
   %new = load volatile i128, ptr @var128
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll b/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll
index 16ef0cbc8a810..438061e430a80 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll
@@ -24,17 +24,17 @@ define i32 @cmpxchg_cstexpr_addr(i32 %cmp, i32 %new, ptr %ps) #0 {
 ; CHECK-LABEL: cmpxchg_cstexpr_addr:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    adrp x10, _g at PAGE
-; CHECK-NEXT:    add x10, x10, _g at PAGEOFF
+; CHECK-NEXT:    adrp x9, _g at PAGE
+; CHECK-NEXT:    add x9, x9, _g at PAGEOFF
 ; CHECK-NEXT:  LBB1_1: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldaxr w0, [x10]
+; CHECK-NEXT:    ldaxr w0, [x9]
 ; CHECK-NEXT:    cmp w0, w8
 ; CHECK-NEXT:    b.ne LBB1_3
 ; CHECK-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    stlxr w9, w1, [x10]
-; CHECK-NEXT:    cbnz w9, LBB1_1
+; CHECK-NEXT:    stlxr wzr, w1, [x9]
+; CHECK-NEXT:    cbnz wzr, LBB1_1
 ; CHECK-NEXT:  LBB1_3:
-; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    cmp w0, w8
 ; CHECK-NEXT:    cset w8, eq
 ; CHECK-NEXT:    ; kill: def $w1 killed $w8
 ; CHECK-NEXT:    str w8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
index 0adc103be27f7..1e50776c3c9c2 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
 
 ; CHECK-LABEL: cmpxchg_monotonic_32:
@@ -15,6 +16,22 @@
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
 define i32 @cmpxchg_monotonic_32(ptr %p, i32 %cmp, i32 %new, ptr %ps) #0 {
+; CHECK-LABEL: cmpxchg_monotonic_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr w0, [x8]
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.ne .LBB0_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    stlxr wzr, w2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB0_1
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    and w8, w8, #0x1
+; CHECK-NEXT:    str w8, [x3]
+; CHECK-NEXT:    ret
   %tmp0 = cmpxchg ptr %p, i32 %cmp, i32 %new monotonic monotonic
   %tmp1 = extractvalue { i32, i1 } %tmp0, 0
   %tmp2 = extractvalue { i32, i1 } %tmp0, 1
@@ -40,6 +57,23 @@ define i32 @cmpxchg_monotonic_32(ptr %p, i32 %cmp, i32 %new, ptr %ps) #0 {
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
 define i32 @cmpxchg_acq_rel_32_load(ptr %p, i32 %cmp, ptr %pnew, ptr %ps) #0 {
+; CHECK-LABEL: cmpxchg_acq_rel_32_load:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    ldr w9, [x2]
+; CHECK-NEXT:  .LBB1_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr w0, [x8]
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.ne .LBB1_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:    stlxr wzr, w9, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB1_1
+; CHECK-NEXT:  .LBB1_3:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    and w8, w8, #0x1
+; CHECK-NEXT:    str w8, [x3]
+; CHECK-NEXT:    ret
   %new = load i32, ptr %pnew
   %tmp0 = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel acquire
   %tmp1 = extractvalue { i32, i1 } %tmp0, 0
@@ -64,6 +98,22 @@ define i32 @cmpxchg_acq_rel_32_load(ptr %p, i32 %cmp, ptr %pnew, ptr %ps) #0 {
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
 define i64 @cmpxchg_seq_cst_64(ptr %p, i64 %cmp, i64 %new, ptr %ps) #0 {
+; CHECK-LABEL: cmpxchg_seq_cst_64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr x0, [x8]
+; CHECK-NEXT:    cmp x0, x1
+; CHECK-NEXT:    b.ne .LBB2_3
+; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT:    stlxr wzr, x2, [x8]
+; CHECK-NEXT:    cbnz wzr, .LBB2_1
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    cmp x0, x1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    and w8, w8, #0x1
+; CHECK-NEXT:    str w8, [x3]
+; CHECK-NEXT:    ret
   %tmp0 = cmpxchg ptr %p, i64 %cmp, i64 %new seq_cst seq_cst
   %tmp1 = extractvalue { i64, i1 } %tmp0, 0
   %tmp2 = extractvalue { i64, i1 } %tmp0, 1
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
index 700a060ef968f..f8c2787caa44f 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mattr=cmp-bcc-fusion | FileCheck %s
 ; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a77    | FileCheck %s
 ; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78    | FileCheck %s
@@ -16,6 +17,16 @@
 
 
 define void @test_cmp_bcc_fusion(i32 %x, i32 %y, i32* %arr) {
+; CHECK-LABEL: test_cmp_bcc_fusion:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str w0, [x2]
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.ne .LBB0_2
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1: // %if_true
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %if_false
+; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %x, %y
   store i32 %x, i32* %arr, align 4
diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
index aee705f0be9b9..98ad3cbdfa611 100644
--- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
@@ -147,7 +147,7 @@ define target("aarch64.svcount") @test_sel_cc(target("aarch64.svcount") %x, targ
 ; CHECK-O0:       // %bb.0:
 ; CHECK-O0-NEXT:    mov p2.b, p1.b
 ; CHECK-O0-NEXT:    mov p1.b, p0.b
-; CHECK-O0-NEXT:    subs w8, w0, #42
+; CHECK-O0-NEXT:    cmp w0, #42
 ; CHECK-O0-NEXT:    cset w9, gt
 ; CHECK-O0-NEXT:    // implicit-def: $x8
 ; CHECK-O0-NEXT:    mov w8, w9



More information about the llvm-commits mailing list