[llvm] [ARM] Fix failure to register-allocate CMP_SWAP_64 pseudo-inst (PR #106721)

Oliver Stannard via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 30 05:45:09 PDT 2024


https://github.com/ostannard created https://github.com/llvm/llvm-project/pull/106721

This test case was failing to compile with a "ran out of registers during register allocation" error at -O0. This was because CMP_SWAP_64 has 3 operands which must be an even-odd register pair, and two other GPR operands. All of the def operands are also early-clobber, so registers can't be shared between uses and defs. Because the function has an over-aligned alloca it needs frame and base pointers, so r6 and r11 are both reserved. That leaves r0/r1, r2/r3, r4/r5 and r8/r9 as the only valid register pairs, and if the two individual GPR operands happen to get allocated to registers in different pairs then only 2 pairs will be available for the three GPRPair operands.

To fix this, I've merged the two GPR operands into a single GPRPair operand. This means that the instruction now has 4 GPRPair operands, which can always be allocated without relying on luck. This does constrain register allocation a bit more, but this pseudo instruction is only used at -O0, so I don't think that's a problem.

>From 3fb4ba12c9a5a3276caf85aea06517c14d280863 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 30 Aug 2024 13:15:37 +0100
Subject: [PATCH] [ARM] Fix failure to register-allocate CMP_SWAP_64
 pseudo-inst

This test case was failing to compile with a "ran out of registers
during register allocation" error at -O0. This was because CMP_SWAP_64
has 3 operands which must be an even-odd register pair, and two other
GPR operands. All of the def operands are also early-clobber, so
registers can't be shared between uses and defs. Because the function
has an over-aligned alloca it needs frame and base pointers, so r6 and
r11 are both reserved. That leaves r0/r1, r2/r3, r4/r5 and r8/r9 as the
only valid register pairs, and if the two individual GPR operands happen
to get allocated to registers in different pairs then only 2 pairs will
be available for the three GPRPair operands.

To fix this, I've merged the two GPR operands into a single GPRPair
operand. This means that the instruction now has 4 GPRPair operands,
which can always be allocated without relying on luck. This does
constrain register allocation a bit more, but this pseudo instruction is
only used at -O0, so I don't think that's a problem.
---
 llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp  |    9 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   32 +-
 llvm/lib/Target/ARM/ARMInstrInfo.td           |   17 +-
 .../CodeGen/ARM/atomic-64bit-fast-regalloc.ll |   96 ++
 llvm/test/CodeGen/ARM/atomic-load-store.ll    |   96 +-
 .../ARM/atomicrmw_exclusive_monitor_ints.ll   | 1112 +++++++++--------
 llvm/test/CodeGen/ARM/cmpxchg-O0.ll           |    5 +-
 llvm/test/CodeGen/ARM/cmpxchg.mir             |   13 +-
 llvm/test/CodeGen/Thumb2/cmpxchg.mir          |   13 +-
 9 files changed, 829 insertions(+), 564 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll

diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index df10613fcc7c93..25dfacca956bb8 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1942,11 +1942,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
   MachineInstr &MI = *MBBI;
   DebugLoc DL = MI.getDebugLoc();
   MachineOperand &Dest = MI.getOperand(0);
-  Register TempReg = MI.getOperand(1).getReg();
   // Duplicating undef operands into 2 instructions does not guarantee the same
   // value on both; However undef should be replaced by xzr anyway.
-  assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
-  Register AddrReg = MI.getOperand(2).getReg();
+  assert(!MI.getOperand(1).isUndef() && "cannot handle undef");
+  Register AddrAndTempReg = MI.getOperand(1).getReg();
+  Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0);
+  Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1);
+  assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
+         "tied operands have different registers");
   Register DesiredReg = MI.getOperand(3).getReg();
   MachineOperand New = MI.getOperand(4);
   New.setIsKill(false);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ec6367a803506b..e0e62950b42c19 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -10479,33 +10479,41 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
   Results.push_back(Cycles32.getValue(1));
 }
 
-static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
-  SDLoc dl(V.getNode());
-  auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
-  bool isBigEndian = DAG.getDataLayout().isBigEndian();
-  if (isBigEndian)
-    std::swap (VLo, VHi);
+static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1) {
+  SDLoc dl(V0.getNode());
   SDValue RegClass =
       DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
   SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
   SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
-  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   return SDValue(
       DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
 }
 
+static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) {
+  SDLoc dl(V.getNode());
+  auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
+  bool isBigEndian = DAG.getDataLayout().isBigEndian();
+  if (isBigEndian)
+    std::swap (VLo, VHi);
+  return createGPRPairNode2xi32(DAG, VLo, VHi);
+}
+
 static void ReplaceCMP_SWAP_64Results(SDNode *N,
                                        SmallVectorImpl<SDValue> & Results,
                                        SelectionDAG &DAG) {
   assert(N->getValueType(0) == MVT::i64 &&
          "AtomicCmpSwap on types less than 64 should be legal");
-  SDValue Ops[] = {N->getOperand(1),
-                   createGPRPairNode(DAG, N->getOperand(2)),
-                   createGPRPairNode(DAG, N->getOperand(3)),
-                   N->getOperand(0)};
+  SDValue Ops[] = {
+      createGPRPairNode2xi32(DAG, N->getOperand(1),
+                             DAG.getUNDEF(MVT::i32)), // pointer, temp
+      createGPRPairNodei64(DAG, N->getOperand(2)),    // expected
+      createGPRPairNodei64(DAG, N->getOperand(3)),    // new
+      N->getOperand(0),                               // chain in
+  };
   SDNode *CmpSwap = DAG.getMachineNode(
       ARM::CMP_SWAP_64, SDLoc(N),
-      DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
+      DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
 
   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
   DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 26f7d70b43b262..0fc561382084e3 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -6509,8 +6509,21 @@ def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
                              (ins GPR:$addr, GPR:$desired, GPR:$new),
                              NoItinerary, []>, Sched<[]>;
 
-def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
-                             (ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
+// The addr_temp and addr_temp_out operands are logically a pair of GPR
+// operands:
+// * addr is an input, holding the address to swap.
+// * temp is a earlyclobber output, used internally in the expansion of the
+//   pseudo-inst.
+// These are combined into one GPRPair operand to ensure that register
+// allocation always succeeds. In the worst case there are only 4 GPRPair
+// registers available, of which this instruction needs 3 for the other
+// operands. If these operands weren't combined they would also use two GPR
+// registers, which could overlap with two different GPRPairs, causing
+// allocation to fail. With them combined, we need to allocate 4 GPRPairs,
+// which will always succeed.
+let Constraints = "@earlyclobber $Rd,$addr_temp_out = $addr_temp" in
+def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out),
+                             (ins GPRPair:$addr_temp, GPRPair:$desired, GPRPair:$new),
                              NoItinerary, []>, Sched<[]>;
 }
 
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll b/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll
new file mode 100644
index 00000000000000..bcaea3d0258b70
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/atomic-64bit-fast-regalloc.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -mtriple=armv7eb-none-eabi -O0 | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+
+;; Previously, this failed during register allocation because the CMP_SWAP_64
+;; pseudo-instruction has a lot of operands, many of which need to be even-odd
+;; register pairs, and the over-aligned alloca in this function causes both a
+;; frame pointer and a base pointer to be needed.
+
+define void @test(ptr %ptr) {
+; CHECK-LABEL: test:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .setfp r11, sp, #24
+; CHECK-NEXT:    add r11, sp, #24
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    bfc sp, #0, #4
+; CHECK-NEXT:    mov r6, sp
+; CHECK-NEXT:    str r0, [r6, #28] @ 4-byte Spill
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1: @ %block1
+; CHECK-NEXT:    ldr r0, [r6, #28] @ 4-byte Reload
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    sub r1, r1, #16
+; CHECK-NEXT:    bic r1, r1, #15
+; CHECK-NEXT:    mov sp, r1
+; CHECK-NEXT:    dmb ish
+; CHECK-NEXT:    ldr r1, [r0]
+; CHECK-NEXT:    ldr r0, [r0, #4]
+; CHECK-NEXT:    str r1, [r6, #20] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [r6, #24] @ 4-byte Spill
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:  .LBB0_2: @ %atomicrmw.start
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB0_3 Depth 2
+; CHECK-NEXT:    ldr r2, [r6, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [r6, #20] @ 4-byte Reload
+; CHECK-NEXT:    ldr r8, [r6, #28] @ 4-byte Reload
+; LE-NEXT:       str r2, [r6, #16] @ 4-byte Spill
+; LE-NEXT:       str r0, [r6, #12] @ 4-byte Spill
+; BE-NEXT:       str r2, [r6, #12] @ 4-byte Spill
+; BE-NEXT:       str r0, [r6, #16] @ 4-byte Spill
+; CHECK-NEXT:    @ implicit-def: $r1
+; CHECK-NEXT:    @ implicit-def: $r3
+; CHECK-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    @ kill: def $r0 killed $r0 def $r0_r1
+; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    mov r3, r12
+; CHECK-NEXT:  .LBB0_3: @ %atomicrmw.start
+; CHECK-NEXT:    @ Parent Loop BB0_2 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ldrexd r4, r5, [r8]
+; CHECK-NEXT:    cmp r4, r0
+; CHECK-NEXT:    cmpeq r5, r1
+; CHECK-NEXT:    bne .LBB0_5
+; CHECK-NEXT:  @ %bb.4: @ %atomicrmw.start
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=2
+; CHECK-NEXT:    strexd r9, r2, r3, [r8]
+; CHECK-NEXT:    cmp r9, #0
+; CHECK-NEXT:    bne .LBB0_3
+; CHECK-NEXT:  .LBB0_5: @ %atomicrmw.start
+; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    ldr r2, [r6, #12] @ 4-byte Reload
+; LE-NEXT:       ldr r1, [r6, #16] @ 4-byte Reload
+; LE-NEXT:       mov r0, r5
+; LE-NEXT:       eor r3, r0, r1
+; LE-NEXT:       mov r1, r4
+; LE-NEXT:       eor r2, r1, r2
+; BE-NEXT:       ldr r0, [r6, #16] @ 4-byte Reload
+; BE-NEXT:       mov     r1, r4
+; BE-NEXT:       eor     r3, r1, r0
+; BE-NEXT:       mov r0, r5
+; BE-NEXT:       eor     r2, r0, r2
+; CHECK-NEXT:    orr r2, r2, r3
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    str r1, [r6, #20] @ 4-byte Spill
+; CHECK-NEXT:    str r0, [r6, #24] @ 4-byte Spill
+; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_6: @ %atomicrmw.end
+; CHECK-NEXT:    dmb ish
+; CHECK-NEXT:    sub sp, r11, #24
+; CHECK-NEXT:    pop {r4, r5, r6, r8, r9, r10, r11, pc}
+entry:
+  br label %block1
+
+block1:
+  %stuff = alloca i8, i64 16, align 16
+  store atomic i64 0, ptr %ptr seq_cst, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index c53fb2f330a792..14e49bf3c9376a 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -327,50 +327,56 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
 ; ARMOPTNONE-NEXT:    push {r4, r5, r7, lr}
 ; ARMOPTNONE-NEXT:    add r7, sp, #8
 ; ARMOPTNONE-NEXT:    push {r8, r10, r11}
-; ARMOPTNONE-NEXT:    sub sp, sp, #20
-; ARMOPTNONE-NEXT:    str r0, [sp] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    sub sp, sp, #24
+; ARMOPTNONE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    dmb ish
 ; ARMOPTNONE-NEXT:    ldr r1, [r0]
 ; ARMOPTNONE-NEXT:    ldr r0, [r0, #4]
-; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    b LBB5_1
 ; ARMOPTNONE-NEXT:  LBB5_1: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ =>This Loop Header: Depth=1
 ; ARMOPTNONE-NEXT:    @ Child Loop BB5_2 Depth 2
-; ARMOPTNONE-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r3, [sp] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r10, [sp, #8] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
-; ARMOPTNONE-NEXT:    mov r11, r0
-; ARMOPTNONE-NEXT:    mov r8, r2
+; ARMOPTNONE-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r12, [sp, #8] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r8, [sp, #4] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    str r3, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    @ implicit-def: $r1
+; ARMOPTNONE-NEXT:    @ implicit-def: $r9
+; ARMOPTNONE-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
 ; ARMOPTNONE-NEXT:    mov r9, r1
+; ARMOPTNONE-NEXT:    @ kill: def $r0 killed $r0 def $r0_r1
+; ARMOPTNONE-NEXT:    mov r1, r12
+; ARMOPTNONE-NEXT:    mov r10, r2
+; ARMOPTNONE-NEXT:    mov r11, r3
 ; ARMOPTNONE-NEXT:  LBB5_2: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ Parent Loop BB5_1 Depth=1
 ; ARMOPTNONE-NEXT:    @ => This Inner Loop Header: Depth=2
-; ARMOPTNONE-NEXT:    ldrexd r4, r5, [r3]
-; ARMOPTNONE-NEXT:    cmp r4, r8
-; ARMOPTNONE-NEXT:    cmpeq r5, r9
+; ARMOPTNONE-NEXT:    ldrexd r4, r5, [r8]
+; ARMOPTNONE-NEXT:    cmp r4, r10
+; ARMOPTNONE-NEXT:    cmpeq r5, r11
 ; ARMOPTNONE-NEXT:    bne LBB5_4
 ; ARMOPTNONE-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ in Loop: Header=BB5_2 Depth=2
-; ARMOPTNONE-NEXT:    strexd r0, r10, r11, [r3]
-; ARMOPTNONE-NEXT:    cmp r0, #0
+; ARMOPTNONE-NEXT:    strexd r9, r0, r1, [r8]
+; ARMOPTNONE-NEXT:    cmp r9, #0
 ; ARMOPTNONE-NEXT:    bne LBB5_2
 ; ARMOPTNONE-NEXT:  LBB5_4: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ in Loop: Header=BB5_1 Depth=1
+; ARMOPTNONE-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; ARMOPTNONE-NEXT:    mov r0, r5
 ; ARMOPTNONE-NEXT:    eor r3, r0, r1
 ; ARMOPTNONE-NEXT:    mov r1, r4
 ; ARMOPTNONE-NEXT:    eor r2, r1, r2
 ; ARMOPTNONE-NEXT:    orr r2, r2, r3
 ; ARMOPTNONE-NEXT:    cmp r2, #0
-; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    bne LBB5_1
 ; ARMOPTNONE-NEXT:    b LBB5_5
 ; ARMOPTNONE-NEXT:  LBB5_5: @ %atomicrmw.end
@@ -861,52 +867,58 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
 ; ARMOPTNONE-NEXT:    push {r4, r5, r7, lr}
 ; ARMOPTNONE-NEXT:    add r7, sp, #8
 ; ARMOPTNONE-NEXT:    push {r8, r10, r11}
-; ARMOPTNONE-NEXT:    sub sp, sp, #20
-; ARMOPTNONE-NEXT:    str r0, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    sub sp, sp, #24
+; ARMOPTNONE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    vmov d16, r1, r2
 ; ARMOPTNONE-NEXT:    vmov r1, r2, d16
-; ARMOPTNONE-NEXT:    str r2, [sp, #4] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    dmb ish
 ; ARMOPTNONE-NEXT:    ldr r1, [r0]
 ; ARMOPTNONE-NEXT:    ldr r0, [r0, #4]
-; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    b LBB13_1
 ; ARMOPTNONE-NEXT:  LBB13_1: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ =>This Loop Header: Depth=1
 ; ARMOPTNONE-NEXT:    @ Child Loop BB13_2 Depth 2
-; ARMOPTNONE-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r3, [sp] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    ldr r10, [sp, #8] @ 4-byte Reload
-; ARMOPTNONE-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
-; ARMOPTNONE-NEXT:    mov r11, r0
-; ARMOPTNONE-NEXT:    mov r8, r2
+; ARMOPTNONE-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r12, [sp, #8] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    ldr r8, [sp, #4] @ 4-byte Reload
+; ARMOPTNONE-NEXT:    str r3, [sp] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    @ implicit-def: $r1
+; ARMOPTNONE-NEXT:    @ implicit-def: $r9
+; ARMOPTNONE-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
 ; ARMOPTNONE-NEXT:    mov r9, r1
+; ARMOPTNONE-NEXT:    @ kill: def $r0 killed $r0 def $r0_r1
+; ARMOPTNONE-NEXT:    mov r1, r12
+; ARMOPTNONE-NEXT:    mov r10, r2
+; ARMOPTNONE-NEXT:    mov r11, r3
 ; ARMOPTNONE-NEXT:  LBB13_2: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ Parent Loop BB13_1 Depth=1
 ; ARMOPTNONE-NEXT:    @ => This Inner Loop Header: Depth=2
-; ARMOPTNONE-NEXT:    ldrexd r4, r5, [r3]
-; ARMOPTNONE-NEXT:    cmp r4, r8
-; ARMOPTNONE-NEXT:    cmpeq r5, r9
+; ARMOPTNONE-NEXT:    ldrexd r4, r5, [r8]
+; ARMOPTNONE-NEXT:    cmp r4, r10
+; ARMOPTNONE-NEXT:    cmpeq r5, r11
 ; ARMOPTNONE-NEXT:    bne LBB13_4
 ; ARMOPTNONE-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ in Loop: Header=BB13_2 Depth=2
-; ARMOPTNONE-NEXT:    strexd r0, r10, r11, [r3]
-; ARMOPTNONE-NEXT:    cmp r0, #0
+; ARMOPTNONE-NEXT:    strexd r9, r0, r1, [r8]
+; ARMOPTNONE-NEXT:    cmp r9, #0
 ; ARMOPTNONE-NEXT:    bne LBB13_2
 ; ARMOPTNONE-NEXT:  LBB13_4: @ %atomicrmw.start
 ; ARMOPTNONE-NEXT:    @ in Loop: Header=BB13_1 Depth=1
+; ARMOPTNONE-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; ARMOPTNONE-NEXT:    mov r0, r5
 ; ARMOPTNONE-NEXT:    eor r3, r0, r1
 ; ARMOPTNONE-NEXT:    mov r1, r4
 ; ARMOPTNONE-NEXT:    eor r2, r1, r2
 ; ARMOPTNONE-NEXT:    orr r2, r2, r3
 ; ARMOPTNONE-NEXT:    cmp r2, #0
-; ARMOPTNONE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; ARMOPTNONE-NEXT:    bne LBB13_1
 ; ARMOPTNONE-NEXT:    b LBB13_5
 ; ARMOPTNONE-NEXT:  LBB13_5: @ %atomicrmw.end
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index 161692137fc30b..a38ade7cdbf06b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -6765,8 +6765,8 @@ entry:
 define i64 @test_xchg_i64() {
 ; CHECK-ARM8-LABEL: test_xchg_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -6781,25 +6781,29 @@ define i64 @test_xchg_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB33_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    mov r0, #0
-; CHECK-ARM8-NEXT:    mov r8, #1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    mov r10, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
 ; CHECK-ARM8-NEXT:  .LBB33_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB33_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB33_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB33_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB33_2
 ; CHECK-ARM8-NEXT:  .LBB33_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB33_1 Depth=1
@@ -6819,12 +6823,12 @@ define i64 @test_xchg_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_xchg_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI33_0
@@ -6838,24 +6842,28 @@ define i64 @test_xchg_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB33_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI33_0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI33_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    mov r0, #0
-; CHECK-ARM6-NEXT:    mov r8, #1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    mov r10, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
 ; CHECK-ARM6-NEXT:  .LBB33_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB33_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB33_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB33_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB33_2
 ; CHECK-ARM6-NEXT:  .LBB33_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB33_1 Depth=1
@@ -6875,7 +6883,7 @@ define i64 @test_xchg_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI33_0:
@@ -6883,8 +6891,8 @@ define i64 @test_xchg_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_xchg_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -6899,26 +6907,30 @@ define i64 @test_xchg_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB33_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    movs r0, #0
-; CHECK-THUMB7-NEXT:    mov.w r8, #1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    mov.w r10, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
 ; CHECK-THUMB7-NEXT:  .LBB33_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB33_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB33_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB33_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB33_2
 ; CHECK-THUMB7-NEXT:  .LBB33_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB33_1 Depth=1
@@ -6938,7 +6950,7 @@ define i64 @test_xchg_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_xchg_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -6975,8 +6987,8 @@ entry:
 define i64 @test_add_i64() {
 ; CHECK-ARM8-LABEL: test_add_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -6991,25 +7003,29 @@ define i64 @test_add_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB34_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    adds r8, r2, #1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
+; CHECK-ARM8-NEXT:    adds r10, r2, #1
 ; CHECK-ARM8-NEXT:    adc r0, r1, #0
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB34_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB34_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB34_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB34_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB34_2
 ; CHECK-ARM8-NEXT:  .LBB34_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB34_1 Depth=1
@@ -7029,12 +7045,12 @@ define i64 @test_add_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_add_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI34_0
@@ -7048,24 +7064,28 @@ define i64 @test_add_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB34_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    adds r8, r2, #1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
+; CHECK-ARM6-NEXT:    adds r10, r2, #1
 ; CHECK-ARM6-NEXT:    adc r0, r1, #0
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI34_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI34_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB34_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB34_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB34_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB34_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB34_2
 ; CHECK-ARM6-NEXT:  .LBB34_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB34_1 Depth=1
@@ -7085,7 +7105,7 @@ define i64 @test_add_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI34_0:
@@ -7093,8 +7113,8 @@ define i64 @test_add_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_add_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -7109,26 +7129,30 @@ define i64 @test_add_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB34_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    adds.w r8, r2, #1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    adds.w r10, r2, #1
 ; CHECK-THUMB7-NEXT:    adc r0, r1, #0
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB34_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB34_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB34_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB34_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB34_2
 ; CHECK-THUMB7-NEXT:  .LBB34_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB34_1 Depth=1
@@ -7148,7 +7172,7 @@ define i64 @test_add_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_add_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -7185,8 +7209,8 @@ entry:
 define i64 @test_sub_i64() {
 ; CHECK-ARM8-LABEL: test_sub_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -7201,25 +7225,29 @@ define i64 @test_sub_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB35_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    subs r8, r2, #1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
+; CHECK-ARM8-NEXT:    subs r10, r2, #1
 ; CHECK-ARM8-NEXT:    sbc r0, r1, #0
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB35_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB35_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB35_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB35_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB35_2
 ; CHECK-ARM8-NEXT:  .LBB35_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB35_1 Depth=1
@@ -7239,12 +7267,12 @@ define i64 @test_sub_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_sub_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI35_0
@@ -7258,24 +7286,28 @@ define i64 @test_sub_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB35_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    subs r8, r2, #1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
+; CHECK-ARM6-NEXT:    subs r10, r2, #1
 ; CHECK-ARM6-NEXT:    sbc r0, r1, #0
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI35_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI35_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB35_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB35_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB35_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB35_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB35_2
 ; CHECK-ARM6-NEXT:  .LBB35_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB35_1 Depth=1
@@ -7295,7 +7327,7 @@ define i64 @test_sub_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI35_0:
@@ -7303,8 +7335,8 @@ define i64 @test_sub_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_sub_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -7319,26 +7351,30 @@ define i64 @test_sub_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB35_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    subs.w r8, r2, #1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    subs.w r10, r2, #1
 ; CHECK-THUMB7-NEXT:    sbc r0, r1, #0
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB35_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB35_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB35_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB35_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB35_2
 ; CHECK-THUMB7-NEXT:  .LBB35_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB35_1 Depth=1
@@ -7358,7 +7394,7 @@ define i64 @test_sub_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_sub_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -7395,8 +7431,8 @@ entry:
 define i64 @test_and_i64() {
 ; CHECK-ARM8-LABEL: test_and_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -7411,25 +7447,29 @@ define i64 @test_and_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB36_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    and r8, r2, #1
+; CHECK-ARM8-NEXT:    and r10, r2, #1
 ; CHECK-ARM8-NEXT:    mov r0, #0
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB36_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB36_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB36_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB36_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB36_2
 ; CHECK-ARM8-NEXT:  .LBB36_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB36_1 Depth=1
@@ -7449,12 +7489,12 @@ define i64 @test_and_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_and_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI36_0
@@ -7468,24 +7508,28 @@ define i64 @test_and_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB36_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    and r8, r2, #1
+; CHECK-ARM6-NEXT:    and r10, r2, #1
 ; CHECK-ARM6-NEXT:    mov r0, #0
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI36_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI36_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB36_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB36_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB36_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB36_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB36_2
 ; CHECK-ARM6-NEXT:  .LBB36_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB36_1 Depth=1
@@ -7505,7 +7549,7 @@ define i64 @test_and_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI36_0:
@@ -7513,8 +7557,8 @@ define i64 @test_and_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_and_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -7529,26 +7573,30 @@ define i64 @test_and_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB36_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    and r8, r2, #1
+; CHECK-THUMB7-NEXT:    and r10, r2, #1
 ; CHECK-THUMB7-NEXT:    movs r0, #0
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB36_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB36_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB36_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB36_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB36_2
 ; CHECK-THUMB7-NEXT:  .LBB36_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB36_1 Depth=1
@@ -7568,7 +7616,7 @@ define i64 @test_and_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_and_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -7605,8 +7653,8 @@ entry:
 define i64 @test_nand_i64() {
 ; CHECK-ARM8-LABEL: test_nand_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -7621,27 +7669,31 @@ define i64 @test_nand_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB37_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    mvn r0, r2
 ; CHECK-ARM8-NEXT:    mvn r3, #1
-; CHECK-ARM8-NEXT:    orr r8, r0, r3
+; CHECK-ARM8-NEXT:    orr r10, r0, r3
 ; CHECK-ARM8-NEXT:    mvn r0, #0
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB37_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB37_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB37_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB37_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB37_2
 ; CHECK-ARM8-NEXT:  .LBB37_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB37_1 Depth=1
@@ -7661,12 +7713,12 @@ define i64 @test_nand_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_nand_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI37_0
@@ -7680,26 +7732,30 @@ define i64 @test_nand_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB37_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    mvn r0, r2
 ; CHECK-ARM6-NEXT:    mvn r3, #1
-; CHECK-ARM6-NEXT:    orr r8, r0, r3
+; CHECK-ARM6-NEXT:    orr r10, r0, r3
 ; CHECK-ARM6-NEXT:    mvn r0, #0
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI37_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI37_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB37_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB37_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB37_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB37_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB37_2
 ; CHECK-ARM6-NEXT:  .LBB37_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB37_1 Depth=1
@@ -7719,7 +7775,7 @@ define i64 @test_nand_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI37_0:
@@ -7727,8 +7783,8 @@ define i64 @test_nand_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_nand_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -7743,27 +7799,31 @@ define i64 @test_nand_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB37_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    mvn r0, #1
-; CHECK-THUMB7-NEXT:    orn r8, r0, r2
+; CHECK-THUMB7-NEXT:    orn r10, r0, r2
 ; CHECK-THUMB7-NEXT:    mov.w r0, #-1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB37_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB37_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB37_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB37_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB37_2
 ; CHECK-THUMB7-NEXT:  .LBB37_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB37_1 Depth=1
@@ -7783,7 +7843,7 @@ define i64 @test_nand_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_nand_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -7820,8 +7880,8 @@ entry:
 define i64 @test_or_i64() {
 ; CHECK-ARM8-LABEL: test_or_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -7836,24 +7896,28 @@ define i64 @test_or_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB38_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    orr r8, r2, #1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    orr r10, r2, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
 ; CHECK-ARM8-NEXT:    mov r9, r1
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB38_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB38_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB38_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB38_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB38_2
 ; CHECK-ARM8-NEXT:  .LBB38_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB38_1 Depth=1
@@ -7873,12 +7937,12 @@ define i64 @test_or_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_or_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI38_0
@@ -7892,23 +7956,27 @@ define i64 @test_or_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB38_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    orr r8, r2, #1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    orr r10, r2, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
 ; CHECK-ARM6-NEXT:    mov r9, r1
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI38_0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI38_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB38_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB38_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB38_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB38_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB38_2
 ; CHECK-ARM6-NEXT:  .LBB38_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB38_1 Depth=1
@@ -7928,7 +7996,7 @@ define i64 @test_or_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI38_0:
@@ -7936,8 +8004,8 @@ define i64 @test_or_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_or_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -7952,25 +8020,29 @@ define i64 @test_or_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB38_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    orr r8, r2, #1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    orr r10, r2, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r1
+; CHECK-THUMB7-NEXT:    mov r8, r2
 ; CHECK-THUMB7-NEXT:    mov r9, r1
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB38_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB38_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB38_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB38_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB38_2
 ; CHECK-THUMB7-NEXT:  .LBB38_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB38_1 Depth=1
@@ -7990,7 +8062,7 @@ define i64 @test_or_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_or_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -8027,8 +8099,8 @@ entry:
 define i64 @test_xor_i64() {
 ; CHECK-ARM8-LABEL: test_xor_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -8043,24 +8115,28 @@ define i64 @test_xor_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB39_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    eor r8, r2, #1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    eor r10, r2, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
 ; CHECK-ARM8-NEXT:    mov r9, r1
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB39_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB39_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB39_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB39_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB39_2
 ; CHECK-ARM8-NEXT:  .LBB39_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB39_1 Depth=1
@@ -8080,12 +8156,12 @@ define i64 @test_xor_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_xor_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI39_0
@@ -8099,23 +8175,27 @@ define i64 @test_xor_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB39_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    eor r8, r2, #1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    eor r10, r2, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
 ; CHECK-ARM6-NEXT:    mov r9, r1
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI39_0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI39_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB39_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB39_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB39_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB39_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB39_2
 ; CHECK-ARM6-NEXT:  .LBB39_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB39_1 Depth=1
@@ -8135,7 +8215,7 @@ define i64 @test_xor_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI39_0:
@@ -8143,8 +8223,8 @@ define i64 @test_xor_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_xor_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -8159,25 +8239,29 @@ define i64 @test_xor_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB39_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    eor r8, r2, #1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    eor r10, r2, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r1
+; CHECK-THUMB7-NEXT:    mov r8, r2
 ; CHECK-THUMB7-NEXT:    mov r9, r1
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB39_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB39_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB39_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB39_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB39_2
 ; CHECK-THUMB7-NEXT:  .LBB39_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB39_1 Depth=1
@@ -8197,7 +8281,7 @@ define i64 @test_xor_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_xor_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -8235,8 +8319,8 @@ entry:
 define i64 @test_max_i64() {
 ; CHECK-ARM8-LABEL: test_max_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -8251,32 +8335,36 @@ define i64 @test_max_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB40_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM8-NEXT:    rscs r0, r1, #0
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlt r0, #1
-; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB40_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB40_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB40_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB40_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB40_2
 ; CHECK-ARM8-NEXT:  .LBB40_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB40_1 Depth=1
@@ -8296,12 +8384,12 @@ define i64 @test_max_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_max_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI40_0
@@ -8315,31 +8403,35 @@ define i64 @test_max_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB40_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM6-NEXT:    rscs r0, r1, #0
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlt r0, #1
-; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI40_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI40_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB40_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB40_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB40_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB40_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB40_2
 ; CHECK-ARM6-NEXT:  .LBB40_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB40_1 Depth=1
@@ -8359,7 +8451,7 @@ define i64 @test_max_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI40_0:
@@ -8367,8 +8459,8 @@ define i64 @test_max_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_max_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -8388,31 +8480,35 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB7-NEXT:    sbcs.w r3, r0, r1
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r0, #1
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB40_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB40_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB40_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB40_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB40_2
 ; CHECK-THUMB7-NEXT:  .LBB40_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB40_1 Depth=1
@@ -8432,7 +8528,7 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_max_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -8539,8 +8635,8 @@ entry:
 define i64 @test_min_i64() {
 ; CHECK-ARM8-LABEL: test_min_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -8555,32 +8651,36 @@ define i64 @test_min_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB41_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    subs r0, r2, #2
 ; CHECK-ARM8-NEXT:    sbcs r0, r1, #0
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlt r0, #1
-; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB41_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB41_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB41_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB41_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB41_2
 ; CHECK-ARM8-NEXT:  .LBB41_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB41_1 Depth=1
@@ -8600,12 +8700,12 @@ define i64 @test_min_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_min_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI41_0
@@ -8619,31 +8719,35 @@ define i64 @test_min_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB41_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    subs r0, r2, #2
 ; CHECK-ARM6-NEXT:    sbcs r0, r1, #0
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlt r0, #1
-; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI41_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI41_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB41_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB41_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB41_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB41_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB41_2
 ; CHECK-ARM6-NEXT:  .LBB41_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB41_1 Depth=1
@@ -8663,7 +8767,7 @@ define i64 @test_min_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI41_0:
@@ -8671,8 +8775,8 @@ define i64 @test_min_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_min_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -8687,36 +8791,40 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB41_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    subs r0, r2, #2
 ; CHECK-THUMB7-NEXT:    sbcs r0, r1, #0
 ; CHECK-THUMB7-NEXT:    mov.w r0, #0
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r0, #1
-; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB41_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB41_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB41_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB41_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB41_2
 ; CHECK-THUMB7-NEXT:  .LBB41_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB41_1 Depth=1
@@ -8736,7 +8844,7 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_min_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -8843,8 +8951,8 @@ entry:
 define i64 @test_umax_i64() {
 ; CHECK-ARM8-LABEL: test_umax_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -8859,32 +8967,36 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB42_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM8-NEXT:    rscs r0, r1, #0
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlo r0, #1
-; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB42_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB42_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB42_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB42_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB42_2
 ; CHECK-ARM8-NEXT:  .LBB42_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB42_1 Depth=1
@@ -8904,12 +9016,12 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_umax_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI42_0
@@ -8923,31 +9035,35 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB42_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM6-NEXT:    rscs r0, r1, #0
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlo r0, #1
-; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI42_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI42_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB42_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB42_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB42_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB42_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB42_2
 ; CHECK-ARM6-NEXT:  .LBB42_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB42_1 Depth=1
@@ -8967,7 +9083,7 @@ define i64 @test_umax_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI42_0:
@@ -8975,8 +9091,8 @@ define i64 @test_umax_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_umax_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -8996,31 +9112,35 @@ define i64 @test_umax_i64() {
 ; CHECK-THUMB7-NEXT:    sbcs.w r3, r0, r1
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r0, #1
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
-; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB42_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB42_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB42_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB42_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB42_2
 ; CHECK-THUMB7-NEXT:  .LBB42_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB42_1 Depth=1
@@ -9040,7 +9160,7 @@ define i64 @test_umax_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_umax_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
@@ -9147,8 +9267,8 @@ entry:
 define i64 @test_umin_i64() {
 ; CHECK-ARM8-LABEL: test_umin_i64:
 ; CHECK-ARM8:       @ %bb.0: @ %entry
-; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM8-NEXT:    .pad #16
 ; CHECK-ARM8-NEXT:    sub sp, sp, #16
 ; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
@@ -9163,32 +9283,36 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM8-NEXT:    @ Child Loop BB43_2 Depth 2
 ; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM8-NEXT:    mov r6, r2
-; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mov r8, r2
+; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    subs r0, r2, #2
 ; CHECK-ARM8-NEXT:    sbcs r0, r1, #0
 ; CHECK-ARM8-NEXT:    mov r0, #0
 ; CHECK-ARM8-NEXT:    movwlo r0, #1
-; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    movne r10, r2
 ; CHECK-ARM8-NEXT:    cmp r0, #0
 ; CHECK-ARM8-NEXT:    movne r0, r1
-; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM8-NEXT:    mov r9, r0
-; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM8-NEXT:    mov r11, r0
+; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    @ implicit-def: $r0
+; CHECK-ARM8-NEXT:    @ implicit-def: $r3
+; CHECK-ARM8-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM8-NEXT:    mov r7, r0
 ; CHECK-ARM8-NEXT:  .LBB43_2: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ Parent Loop BB43_1 Depth=1
 ; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM8-NEXT:    cmp r4, r6
-; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM8-NEXT:    cmp r4, r8
+; CHECK-ARM8-NEXT:    cmpeq r5, r9
 ; CHECK-ARM8-NEXT:    bne .LBB43_4
 ; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB43_2 Depth=2
-; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM8-NEXT:    cmp r7, #0
 ; CHECK-ARM8-NEXT:    bne .LBB43_2
 ; CHECK-ARM8-NEXT:  .LBB43_4: @ %atomicrmw.start
 ; CHECK-ARM8-NEXT:    @ in Loop: Header=BB43_1 Depth=1
@@ -9208,12 +9332,12 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM8-NEXT:    add sp, sp, #16
-; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-ARM6-LABEL: test_umin_i64:
 ; CHECK-ARM6:       @ %bb.0: @ %entry
-; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
-; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-ARM6-NEXT:    .pad #16
 ; CHECK-ARM6-NEXT:    sub sp, sp, #16
 ; CHECK-ARM6-NEXT:    ldr r0, .LCPI43_0
@@ -9227,31 +9351,35 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM6-NEXT:    @ Child Loop BB43_2 Depth 2
 ; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-ARM6-NEXT:    mov r6, r2
-; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mov r8, r2
+; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    subs r0, r2, #2
 ; CHECK-ARM6-NEXT:    sbcs r0, r1, #0
 ; CHECK-ARM6-NEXT:    mov r0, #0
 ; CHECK-ARM6-NEXT:    movlo r0, #1
-; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    movne r10, r2
 ; CHECK-ARM6-NEXT:    cmp r0, #0
 ; CHECK-ARM6-NEXT:    movne r0, r1
-; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-ARM6-NEXT:    mov r9, r0
-; CHECK-ARM6-NEXT:    ldr r3, .LCPI43_0
+; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-ARM6-NEXT:    mov r11, r0
+; CHECK-ARM6-NEXT:    ldr r6, .LCPI43_0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r0
+; CHECK-ARM6-NEXT:    @ implicit-def: $r3
+; CHECK-ARM6-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-ARM6-NEXT:    mov r7, r0
 ; CHECK-ARM6-NEXT:  .LBB43_2: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ Parent Loop BB43_1 Depth=1
 ; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-ARM6-NEXT:    cmp r4, r6
-; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-ARM6-NEXT:    cmp r4, r8
+; CHECK-ARM6-NEXT:    cmpeq r5, r9
 ; CHECK-ARM6-NEXT:    bne .LBB43_4
 ; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB43_2 Depth=2
-; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-ARM6-NEXT:    cmp r7, #0
 ; CHECK-ARM6-NEXT:    bne .LBB43_2
 ; CHECK-ARM6-NEXT:  .LBB43_4: @ %atomicrmw.start
 ; CHECK-ARM6-NEXT:    @ in Loop: Header=BB43_1 Depth=1
@@ -9271,7 +9399,7 @@ define i64 @test_umin_i64() {
 ; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-ARM6-NEXT:    add sp, sp, #16
-; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-ARM6-NEXT:    .p2align 2
 ; CHECK-ARM6-NEXT:  @ %bb.6:
 ; CHECK-ARM6-NEXT:  .LCPI43_0:
@@ -9279,8 +9407,8 @@ define i64 @test_umin_i64() {
 ;
 ; CHECK-THUMB7-LABEL: test_umin_i64:
 ; CHECK-THUMB7:       @ %bb.0: @ %entry
-; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-THUMB7-NEXT:    .pad #16
 ; CHECK-THUMB7-NEXT:    sub sp, #16
 ; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
@@ -9295,36 +9423,40 @@ define i64 @test_umin_i64() {
 ; CHECK-THUMB7-NEXT:    @ Child Loop BB43_2 Depth 2
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-THUMB7-NEXT:    mov r6, r2
-; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mov r8, r2
+; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    subs r0, r2, #2
 ; CHECK-THUMB7-NEXT:    sbcs r0, r1, #0
 ; CHECK-THUMB7-NEXT:    mov.w r0, #0
 ; CHECK-THUMB7-NEXT:    it lo
 ; CHECK-THUMB7-NEXT:    movlo r0, #1
-; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    movne r10, r2
 ; CHECK-THUMB7-NEXT:    cmp r0, #0
 ; CHECK-THUMB7-NEXT:    it ne
 ; CHECK-THUMB7-NEXT:    movne r0, r1
-; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
-; CHECK-THUMB7-NEXT:    mov r9, r0
-; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
-; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
+; CHECK-THUMB7-NEXT:    mov r11, r0
+; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r6, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r0
+; CHECK-THUMB7-NEXT:    @ implicit-def: $r3
+; CHECK-THUMB7-NEXT:    @ kill: def $r6 killed $r6 def $r6_r7
+; CHECK-THUMB7-NEXT:    mov r7, r0
 ; CHECK-THUMB7-NEXT:  .LBB43_2: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ Parent Loop BB43_1 Depth=1
 ; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
-; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r6]
+; CHECK-THUMB7-NEXT:    cmp r4, r8
 ; CHECK-THUMB7-NEXT:    it eq
-; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    cmpeq r5, r9
 ; CHECK-THUMB7-NEXT:    bne .LBB43_4
 ; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB43_2 Depth=2
-; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
-; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    strexd r7, r10, r11, [r6]
+; CHECK-THUMB7-NEXT:    cmp r7, #0
 ; CHECK-THUMB7-NEXT:    bne .LBB43_2
 ; CHECK-THUMB7-NEXT:  .LBB43_4: @ %atomicrmw.start
 ; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB43_1 Depth=1
@@ -9344,7 +9476,7 @@ define i64 @test_umin_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    add sp, #16
-; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
 ; CHECK-THUMB6-LABEL: test_umin_i64:
 ; CHECK-THUMB6:       @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
index 28a64db1aeba4f..9158ae0c9fe159 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
@@ -78,15 +78,14 @@ define { i32, i1 } @test_cmpxchg_32(ptr %addr, i32 %desired, i32 %new) nounwind
 
 define { i64, i1 } @test_cmpxchg_64(ptr %addr, i64 %desired, i64 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_64:
-; CHECK:     mov [[ADDR:r[0-9]+]], r0
 ; CHECK:     dmb ish
 ; CHECK-NOT: uxt
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [[[ADDR]]]
+; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
 ; CHECK:     cmp [[OLDLO]], r6
 ; CHECK:     cmpeq [[OLDHI]], r7
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strexd [[STATUS:[lr0-9]+]], r8, r9, [r1]
+; CHECK:     strexd [[STATUS:[lr0-9]+]], r8, r9, [r0]
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
diff --git a/llvm/test/CodeGen/ARM/cmpxchg.mir b/llvm/test/CodeGen/ARM/cmpxchg.mir
index 20ab787fb4575b..2ef3281ca733e7 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg.mir
+++ b/llvm/test/CodeGen/ARM/cmpxchg.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -o - %s -mtriple=armv7-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s
+# RUN: llc -o - %s -mtriple=armv7eb-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s
 ---
 name: func
 tracksRegLiveness: true
@@ -12,23 +13,23 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .1:
     ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; CHECK-NEXT: liveins: $r4_r5, $r3
+    ; CHECK-NEXT: liveins: $r4_r5, $r2
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $r0_r1 = LDREXD $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $r0_r1 = LDREXD $r2, 14 /* CC::al */, $noreg
     ; CHECK-NEXT: CMPrr killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
     ; CHECK-NEXT: CMPrr killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
     ; CHECK-NEXT: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .2:
     ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; CHECK-NEXT: liveins: $r4_r5, $r3
+    ; CHECK-NEXT: liveins: $r4_r5, $r2
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: early-clobber $r2 = STREXD $r4_r5, $r3, 14 /* CC::al */, $noreg
-    ; CHECK-NEXT: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: early-clobber $r3 = STREXD $r4_r5, $r2, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: CMPri killed $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
     ; CHECK-NEXT: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .3:
-    dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64))
+    dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2_r3 = CMP_SWAP_64 killed renamable $r2_r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64))
 ...
 ---
 name: func2
diff --git a/llvm/test/CodeGen/Thumb2/cmpxchg.mir b/llvm/test/CodeGen/Thumb2/cmpxchg.mir
index 33de25d469a757..c1adb465380f8e 100644
--- a/llvm/test/CodeGen/Thumb2/cmpxchg.mir
+++ b/llvm/test/CodeGen/Thumb2/cmpxchg.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -o - %s -mtriple=thumbv7-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s
+# RUN: llc -o - %s -mtriple=thumbv7eb-unknown-linux-gnu -verify-machineinstrs -run-pass=arm-pseudo | FileCheck %s
 ---
 name: func
 tracksRegLiveness: true
@@ -12,23 +13,23 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .1:
     ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; CHECK-NEXT: liveins: $r4, $r5, $r3
+    ; CHECK-NEXT: liveins: $r4, $r5, $r2
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: $r0, $r1 = t2LDREXD $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $r0, $r1 = t2LDREXD $r2, 14 /* CC::al */, $noreg
     ; CHECK-NEXT: tCMPhir killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
     ; CHECK-NEXT: tCMPhir killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
     ; CHECK-NEXT: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .2:
     ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; CHECK-NEXT: liveins: $r4, $r5, $r3
+    ; CHECK-NEXT: liveins: $r4, $r5, $r2
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: early-clobber $r2 = t2STREXD $r4, $r5, $r3, 14 /* CC::al */, $noreg
-    ; CHECK-NEXT: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: early-clobber $r3 = t2STREXD $r4, $r5, $r2, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: t2CMPri killed $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
     ; CHECK-NEXT: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: .3:
-    dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64))
+    dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2_r3 = CMP_SWAP_64 killed renamable $r2_r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64))
 ...
 ---
 name: func2



More information about the llvm-commits mailing list