[llvm] e225764 - [ARM] Update atomic tests for D129695. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 11:36:14 PDT 2022


Author: David Green
Date: 2022-07-19T19:36:08+01:00
New Revision: e22576455f9de897802b12f721bc6bf15398e70a

URL: https://github.com/llvm/llvm-project/commit/e22576455f9de897802b12f721bc6bf15398e70a
DIFF: https://github.com/llvm/llvm-project/commit/e22576455f9de897802b12f721bc6bf15398e70a.diff

LOG: [ARM] Update atomic tests for D129695. NFC

Added: 
    

Modified: 
    llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
    llvm/test/CodeGen/ARM/cmpxchg.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index dfb4af7229545..bd9a23e693cfb 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -1,12 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; Test the instruction sequences produced by atomicrmw instructions. In
 ; particular, ensure there are no stores/spills inserted between the exclusive
 ; load and stores, which would invalidate the exclusive monitor.
 
-; RUN: llc -mtriple=armv8-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=COMMON --check-prefix=EXPAND32 --check-prefix=EXPAND64
-; RUN: llc -mtriple=armv6-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=COMMON --check-prefix=EXPAND32 --check-prefix=EXPAND64
-; RUN: llc -mtriple=thumbv7-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=COMMON --check-prefix=EXPAND32 --check-prefix=EXPAND64
-; RUN: llc -mtriple=thumbv6-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=COMMON --check-prefix=THUMB1
-; RUN: llc -mtriple=thumbv8m.base-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=COMMON --check-prefix=EXPAND32 --check-prefix=BASELINE64
+; RUN: llc -mtriple=armv8-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=CHECK-ARM8
+; RUN: llc -mtriple=armv6-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=CHECK-ARM6
+; RUN: llc -mtriple=thumbv7-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=CHECK-THUMB7
+; RUN: llc -mtriple=thumbv6-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=CHECK-THUMB6
+; RUN: llc -mtriple=thumbv8m.base-unknown-none-eabi -O0 -o - %s | FileCheck %s --check-prefix=CHECK-THUMB8BASE
 
 @atomic_i8 = external global i8
 @atomic_i16 = external global i16
@@ -14,111 +15,2286 @@
 @atomic_i64 = external global i64
 
 define i8 @test_xchg_i8() {
-; COMMON-LABEL: test_xchg_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_lock_test_and_set_1
+; CHECK-ARM8-LABEL: test_xchg_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB0_1
+; CHECK-ARM8-NEXT:  .LBB0_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB0_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB0_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB0_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB0_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB0_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB0_2
+; CHECK-ARM8-NEXT:  .LBB0_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB0_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB0_1
+; CHECK-ARM8-NEXT:    b .LBB0_5
+; CHECK-ARM8-NEXT:  .LBB0_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xchg_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI0_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB0_1
+; CHECK-ARM6-NEXT:  .LBB0_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB0_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI0_0
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB0_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB0_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB0_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB0_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB0_2
+; CHECK-ARM6-NEXT:  .LBB0_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB0_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB0_1
+; CHECK-ARM6-NEXT:    b .LBB0_5
+; CHECK-ARM6-NEXT:  .LBB0_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI0_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_xchg_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB0_1
+; CHECK-THUMB7-NEXT:  .LBB0_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB0_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB0_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB0_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB0_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB0_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB0_2
+; CHECK-THUMB7-NEXT:  .LBB0_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB0_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB0_1
+; CHECK-THUMB7-NEXT:    b .LBB0_5
+; CHECK-THUMB7-NEXT:  .LBB0_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xchg_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI0_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_lock_test_and_set_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI0_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_xchg_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB0_1
+; CHECK-THUMB8BASE-NEXT:  .LBB0_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB0_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movs r4, #1
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB0_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB0_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB0_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB0_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB0_2
+; CHECK-THUMB8BASE-NEXT:  .LBB0_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB0_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB0_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB0_5
+; CHECK-THUMB8BASE-NEXT:  .LBB0_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xchg i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_add_i8() {
-; COMMON-LABEL: test_add_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_add_1
+; CHECK-ARM8-LABEL: test_add_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB1_1
+; CHECK-ARM8-NEXT:  .LBB1_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB1_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB1_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB1_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB1_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB1_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB1_2
+; CHECK-ARM8-NEXT:  .LBB1_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB1_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB1_1
+; CHECK-ARM8-NEXT:    b .LBB1_5
+; CHECK-ARM8-NEXT:  .LBB1_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_add_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI1_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB1_1
+; CHECK-ARM6-NEXT:  .LBB1_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB1_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI1_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB1_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB1_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB1_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB1_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB1_2
+; CHECK-ARM6-NEXT:  .LBB1_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB1_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB1_1
+; CHECK-ARM6-NEXT:    b .LBB1_5
+; CHECK-ARM6-NEXT:  .LBB1_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI1_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_add_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB1_1
+; CHECK-THUMB7-NEXT:  .LBB1_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB1_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB1_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB1_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB1_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB1_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB1_2
+; CHECK-THUMB7-NEXT:  .LBB1_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB1_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB1_1
+; CHECK-THUMB7-NEXT:    b .LBB1_5
+; CHECK-THUMB7-NEXT:  .LBB1_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_add_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI1_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_add_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI1_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_add_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB1_1
+; CHECK-THUMB8BASE-NEXT:  .LBB1_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB1_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    adds r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB1_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB1_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB1_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB1_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB1_2
+; CHECK-THUMB8BASE-NEXT:  .LBB1_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB1_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB1_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB1_5
+; CHECK-THUMB8BASE-NEXT:  .LBB1_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw add i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_sub_i8() {
-; COMMON-LABEL: test_sub_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_sub_1
+; CHECK-ARM8-LABEL: test_sub_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB2_1
+; CHECK-ARM8-NEXT:  .LBB2_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB2_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sub r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB2_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB2_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB2_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB2_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB2_2
+; CHECK-ARM8-NEXT:  .LBB2_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB2_1
+; CHECK-ARM8-NEXT:    b .LBB2_5
+; CHECK-ARM8-NEXT:  .LBB2_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_sub_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI2_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB2_1
+; CHECK-ARM6-NEXT:  .LBB2_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB2_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sub r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI2_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB2_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB2_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB2_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB2_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB2_2
+; CHECK-ARM6-NEXT:  .LBB2_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB2_1
+; CHECK-ARM6-NEXT:    b .LBB2_5
+; CHECK-ARM6-NEXT:  .LBB2_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI2_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_sub_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB2_1
+; CHECK-THUMB7-NEXT:  .LBB2_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB2_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sub.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB2_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB2_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB2_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB2_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB2_2
+; CHECK-THUMB7-NEXT:  .LBB2_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB2_1
+; CHECK-THUMB7-NEXT:    b .LBB2_5
+; CHECK-THUMB7-NEXT:  .LBB2_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_sub_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI2_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_sub_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI2_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_sub_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB2_1
+; CHECK-THUMB8BASE-NEXT:  .LBB2_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB2_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    subs r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB2_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB2_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB2_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB2_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB2_2
+; CHECK-THUMB8BASE-NEXT:  .LBB2_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB2_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB2_5
+; CHECK-THUMB8BASE-NEXT:  .LBB2_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw sub i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_and_i8() {
-; COMMON-LABEL: test_and_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_and_1
+; CHECK-ARM8-LABEL: test_and_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB3_1
+; CHECK-ARM8-NEXT:  .LBB3_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB3_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    and r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB3_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB3_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB3_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB3_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB3_2
+; CHECK-ARM8-NEXT:  .LBB3_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB3_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB3_1
+; CHECK-ARM8-NEXT:    b .LBB3_5
+; CHECK-ARM8-NEXT:  .LBB3_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_and_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI3_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB3_1
+; CHECK-ARM6-NEXT:  .LBB3_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB3_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    and r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI3_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB3_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB3_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB3_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB3_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB3_2
+; CHECK-ARM6-NEXT:  .LBB3_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB3_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB3_1
+; CHECK-ARM6-NEXT:    b .LBB3_5
+; CHECK-ARM6-NEXT:  .LBB3_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI3_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_and_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB3_1
+; CHECK-THUMB7-NEXT:  .LBB3_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB3_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    and r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB3_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB3_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB3_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB3_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB3_2
+; CHECK-THUMB7-NEXT:  .LBB3_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB3_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB3_1
+; CHECK-THUMB7-NEXT:    b .LBB3_5
+; CHECK-THUMB7-NEXT:  .LBB3_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_and_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI3_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_and_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI3_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_and_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB3_1
+; CHECK-THUMB8BASE-NEXT:  .LBB3_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB3_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    ands r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB3_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB3_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB3_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB3_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB3_2
+; CHECK-THUMB8BASE-NEXT:  .LBB3_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB3_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB3_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB3_5
+; CHECK-THUMB8BASE-NEXT:  .LBB3_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw and i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_nand_i8() {
-; COMMON-LABEL: test_nand_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_nand_1
+; CHECK-ARM8-LABEL: test_nand_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB4_1
+; CHECK-ARM8-NEXT:  .LBB4_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB4_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mvn r0, r1
+; CHECK-ARM8-NEXT:    mvn r2, #1
+; CHECK-ARM8-NEXT:    orr r12, r0, r2
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB4_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB4_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB4_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB4_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB4_2
+; CHECK-ARM8-NEXT:  .LBB4_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB4_1
+; CHECK-ARM8-NEXT:    b .LBB4_5
+; CHECK-ARM8-NEXT:  .LBB4_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_nand_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI4_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB4_1
+; CHECK-ARM6-NEXT:  .LBB4_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB4_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mvn r0, r1
+; CHECK-ARM6-NEXT:    mvn r2, #1
+; CHECK-ARM6-NEXT:    orr r12, r0, r2
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI4_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB4_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB4_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB4_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB4_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB4_2
+; CHECK-ARM6-NEXT:  .LBB4_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB4_1
+; CHECK-ARM6-NEXT:    b .LBB4_5
+; CHECK-ARM6-NEXT:  .LBB4_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI4_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_nand_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB4_1
+; CHECK-THUMB7-NEXT:  .LBB4_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB4_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mvn r0, #1
+; CHECK-THUMB7-NEXT:    orn r12, r0, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB4_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB4_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB4_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB4_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB4_2
+; CHECK-THUMB7-NEXT:  .LBB4_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB4_1
+; CHECK-THUMB7-NEXT:    b .LBB4_5
+; CHECK-THUMB7-NEXT:  .LBB4_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_nand_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI4_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_nand_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI4_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_nand_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB4_1
+; CHECK-THUMB8BASE-NEXT:  .LBB4_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB4_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    mvns r4, r1
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mvns r0, r0
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB4_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB4_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB4_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB4_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB4_2
+; CHECK-THUMB8BASE-NEXT:  .LBB4_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB4_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB4_5
+; CHECK-THUMB8BASE-NEXT:  .LBB4_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw nand i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_or_i8() {
-; COMMON-LABEL: test_or_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_or_1
+; CHECK-ARM8-LABEL: test_or_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB5_1
+; CHECK-ARM8-NEXT:  .LBB5_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB5_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    orr r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB5_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB5_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB5_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB5_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB5_2
+; CHECK-ARM8-NEXT:  .LBB5_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB5_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB5_1
+; CHECK-ARM8-NEXT:    b .LBB5_5
+; CHECK-ARM8-NEXT:  .LBB5_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_or_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI5_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB5_1
+; CHECK-ARM6-NEXT:  .LBB5_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB5_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    orr r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI5_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB5_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB5_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB5_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB5_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB5_2
+; CHECK-ARM6-NEXT:  .LBB5_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB5_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB5_1
+; CHECK-ARM6-NEXT:    b .LBB5_5
+; CHECK-ARM6-NEXT:  .LBB5_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI5_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_or_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB5_1
+; CHECK-THUMB7-NEXT:  .LBB5_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB5_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    orr r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB5_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB5_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB5_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB5_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB5_2
+; CHECK-THUMB7-NEXT:  .LBB5_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB5_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB5_1
+; CHECK-THUMB7-NEXT:    b .LBB5_5
+; CHECK-THUMB7-NEXT:  .LBB5_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_or_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI5_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_or_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI5_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_or_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB5_1
+; CHECK-THUMB8BASE-NEXT:  .LBB5_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB5_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB5_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB5_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB5_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB5_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB5_2
+; CHECK-THUMB8BASE-NEXT:  .LBB5_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB5_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB5_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB5_5
+; CHECK-THUMB8BASE-NEXT:  .LBB5_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw or i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_xor_i8() {
-; COMMON-LABEL: test_xor_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_xor_1
+; CHECK-ARM8-LABEL: test_xor_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB6_1
+; CHECK-ARM8-NEXT:  .LBB6_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB6_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    eor r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB6_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB6_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB6_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB6_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB6_2
+; CHECK-ARM8-NEXT:  .LBB6_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB6_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB6_1
+; CHECK-ARM8-NEXT:    b .LBB6_5
+; CHECK-ARM8-NEXT:  .LBB6_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xor_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI6_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB6_1
+; CHECK-ARM6-NEXT:  .LBB6_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB6_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    eor r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI6_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB6_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB6_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB6_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB6_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB6_2
+; CHECK-ARM6-NEXT:  .LBB6_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB6_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB6_1
+; CHECK-ARM6-NEXT:    b .LBB6_5
+; CHECK-ARM6-NEXT:  .LBB6_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI6_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_xor_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB6_1
+; CHECK-THUMB7-NEXT:  .LBB6_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB6_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    eor r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB6_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB6_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB6_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB6_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB6_2
+; CHECK-THUMB7-NEXT:  .LBB6_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB6_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB6_1
+; CHECK-THUMB7-NEXT:    b .LBB6_5
+; CHECK-THUMB7-NEXT:  .LBB6_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xor_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI6_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_xor_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI6_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_xor_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB6_1
+; CHECK-THUMB8BASE-NEXT:  .LBB6_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB6_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    eors r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB6_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB6_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB6_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB6_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB6_2
+; CHECK-THUMB8BASE-NEXT:  .LBB6_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB6_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB6_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB6_5
+; CHECK-THUMB8BASE-NEXT:  .LBB6_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xor i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_max_i8() {
-; COMMON-LABEL: test_max_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_max_1
+; CHECK-ARM8-LABEL: test_max_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB7_1
+; CHECK-ARM8-NEXT:  .LBB7_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB7_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sxtb r0, r1
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #1
+; CHECK-ARM8-NEXT:    movgt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB7_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB7_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB7_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB7_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB7_2
+; CHECK-ARM8-NEXT:  .LBB7_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB7_1
+; CHECK-ARM8-NEXT:    b .LBB7_5
+; CHECK-ARM8-NEXT:  .LBB7_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_max_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI7_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB7_1
+; CHECK-ARM6-NEXT:  .LBB7_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB7_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sxtb r0, r1
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #1
+; CHECK-ARM6-NEXT:    movgt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI7_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB7_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB7_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB7_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB7_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB7_2
+; CHECK-ARM6-NEXT:  .LBB7_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB7_1
+; CHECK-ARM6-NEXT:    b .LBB7_5
+; CHECK-ARM6-NEXT:  .LBB7_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI7_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_max_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB7_1
+; CHECK-THUMB7-NEXT:  .LBB7_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB7_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sxtb r0, r1
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #1
+; CHECK-THUMB7-NEXT:    it gt
+; CHECK-THUMB7-NEXT:    movgt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB7_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB7_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB7_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB7_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB7_2
+; CHECK-THUMB7-NEXT:  .LBB7_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB7_1
+; CHECK-THUMB7-NEXT:    b .LBB7_5
+; CHECK-THUMB7-NEXT:  .LBB7_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_max_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI7_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_max_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI7_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_max_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB7_1
+; CHECK-THUMB8BASE-NEXT:  .LBB7_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB7_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    sxtb r1, r0
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bgt .LBB7_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB7_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB7_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB7_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB7_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB7_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB7_4
+; CHECK-THUMB8BASE-NEXT:  .LBB7_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB7_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB7_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB7_7
+; CHECK-THUMB8BASE-NEXT:  .LBB7_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw max i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_min_i8() {
-; COMMON-LABEL: test_min_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_min_1
+; CHECK-ARM8-LABEL: test_min_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB8_1
+; CHECK-ARM8-NEXT:  .LBB8_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB8_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sxtb r0, r1
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #2
+; CHECK-ARM8-NEXT:    movlt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:  .LBB8_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB8_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB8_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB8_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB8_2
+; CHECK-ARM8-NEXT:  .LBB8_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxtb r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB8_1
+; CHECK-ARM8-NEXT:    b .LBB8_5
+; CHECK-ARM8-NEXT:  .LBB8_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_min_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI8_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB8_1
+; CHECK-ARM6-NEXT:  .LBB8_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB8_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sxtb r0, r1
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #2
+; CHECK-ARM6-NEXT:    movlt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI8_0
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:  .LBB8_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB8_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB8_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB8_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB8_2
+; CHECK-ARM6-NEXT:  .LBB8_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxtb r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB8_1
+; CHECK-ARM6-NEXT:    b .LBB8_5
+; CHECK-ARM6-NEXT:  .LBB8_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI8_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_min_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB8_1
+; CHECK-THUMB7-NEXT:  .LBB8_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB8_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sxtb r0, r1
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #2
+; CHECK-THUMB7-NEXT:    it lt
+; CHECK-THUMB7-NEXT:    movlt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:  .LBB8_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB8_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB8_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB8_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB8_2
+; CHECK-THUMB7-NEXT:  .LBB8_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxtb r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB8_1
+; CHECK-THUMB7-NEXT:    b .LBB8_5
+; CHECK-THUMB7-NEXT:  .LBB8_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_min_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI8_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_min_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI8_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_min_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB8_1
+; CHECK-THUMB8BASE-NEXT:  .LBB8_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB8_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    sxtb r1, r0
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB8_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB8_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB8_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB8_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB8_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB8_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB8_4
+; CHECK-THUMB8BASE-NEXT:  .LBB8_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB8_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB8_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB8_7
+; CHECK-THUMB8BASE-NEXT:  .LBB8_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw min i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_umax_i8() {
-; COMMON-LABEL: test_umax_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_umax_1
+; CHECK-ARM8-LABEL: test_umax_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r11, lr}
+; CHECK-ARM8-NEXT:    push {r11, lr}
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB9_1
+; CHECK-ARM8-NEXT:  .LBB9_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB9_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    uxtb r1, r12
+; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    movhi lr, r12
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r12, r12
+; CHECK-ARM8-NEXT:  .LBB9_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB9_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r12
+; CHECK-ARM8-NEXT:    bne .LBB9_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB9_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, lr, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB9_2
+; CHECK-ARM8-NEXT:  .LBB9_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB9_1
+; CHECK-ARM8-NEXT:    b .LBB9_5
+; CHECK-ARM8-NEXT:  .LBB9_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    pop {r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umax_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r11, lr}
+; CHECK-ARM6-NEXT:    push {r11, lr}
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI9_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB9_1
+; CHECK-ARM6-NEXT:  .LBB9_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB9_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    uxtb r1, r12
+; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    movhi lr, r12
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI9_0
+; CHECK-ARM6-NEXT:    uxtb r12, r12
+; CHECK-ARM6-NEXT:  .LBB9_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB9_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r12
+; CHECK-ARM6-NEXT:    bne .LBB9_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB9_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, lr, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB9_2
+; CHECK-ARM6-NEXT:  .LBB9_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB9_1
+; CHECK-ARM6-NEXT:    b .LBB9_5
+; CHECK-ARM6-NEXT:  .LBB9_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI9_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_umax_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, lr}
+; CHECK-THUMB7-NEXT:    push {r4, lr}
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB9_1
+; CHECK-THUMB7-NEXT:  .LBB9_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB9_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    uxtb r1, r4
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    it hi
+; CHECK-THUMB7-NEXT:    movhi r12, r4
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r4, r4
+; CHECK-THUMB7-NEXT:  .LBB9_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB9_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r4
+; CHECK-THUMB7-NEXT:    bne .LBB9_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB9_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB9_2
+; CHECK-THUMB7-NEXT:  .LBB9_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB9_1
+; CHECK-THUMB7-NEXT:    b .LBB9_5
+; CHECK-THUMB7-NEXT:  .LBB9_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    pop {r4, pc}
+;
+; CHECK-THUMB6-LABEL: test_umax_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI9_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umax_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI9_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_umax_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #24
+; CHECK-THUMB8BASE-NEXT:    sub sp, #24
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB9_1
+; CHECK-THUMB8BASE-NEXT:  .LBB9_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB9_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bhi .LBB9_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB9_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r4, r4
+; CHECK-THUMB8BASE-NEXT:  .LBB9_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB9_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r4
+; CHECK-THUMB8BASE-NEXT:    bne .LBB9_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB9_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r5, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB9_4
+; CHECK-THUMB8BASE-NEXT:  .LBB9_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB9_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB9_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB9_7
+; CHECK-THUMB8BASE-NEXT:  .LBB9_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #24
+; CHECK-THUMB8BASE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = atomicrmw umax i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
 }
 define i8 @test_umin_i8() {
-; COMMON-LABEL: test_umin_i8:
-; EXPAND32: ldrexb
-; EXPAND32-NOT: str
-; EXPAND32: strexb
-; THUMB1: bl __sync_fetch_and_umin_1
+; CHECK-ARM8-LABEL: test_umin_i8:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r11, lr}
+; CHECK-ARM8-NEXT:    push {r11, lr}
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    ldrb r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB10_1
+; CHECK-ARM8-NEXT:  .LBB10_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB10_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    uxtb r1, r12
+; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
+; CHECK-ARM8-NEXT:    movlo lr, r12
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-ARM8-NEXT:    uxtb r12, r12
+; CHECK-ARM8-NEXT:  .LBB10_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB10_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r12
+; CHECK-ARM8-NEXT:    bne .LBB10_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB10_2 Depth=2
+; CHECK-ARM8-NEXT:    strexb r2, lr, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB10_2
+; CHECK-ARM8-NEXT:  .LBB10_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB10_1
+; CHECK-ARM8-NEXT:    b .LBB10_5
+; CHECK-ARM8-NEXT:  .LBB10_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    pop {r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umin_i8:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r11, lr}
+; CHECK-ARM6-NEXT:    push {r11, lr}
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI10_0
+; CHECK-ARM6-NEXT:    ldrb r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB10_1
+; CHECK-ARM6-NEXT:  .LBB10_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB10_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    uxtb r1, r12
+; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
+; CHECK-ARM6-NEXT:    movlo lr, r12
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI10_0
+; CHECK-ARM6-NEXT:    uxtb r12, r12
+; CHECK-ARM6-NEXT:  .LBB10_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB10_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexb r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r12
+; CHECK-ARM6-NEXT:    bne .LBB10_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB10_2 Depth=2
+; CHECK-ARM6-NEXT:    strexb r2, lr, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB10_2
+; CHECK-ARM6-NEXT:  .LBB10_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB10_1
+; CHECK-ARM6-NEXT:    b .LBB10_5
+; CHECK-ARM6-NEXT:  .LBB10_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI10_0:
+; CHECK-ARM6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB7-LABEL: test_umin_i8:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, lr}
+; CHECK-THUMB7-NEXT:    push {r4, lr}
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB10_1
+; CHECK-THUMB7-NEXT:  .LBB10_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB10_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    uxtb r1, r4
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
+; CHECK-THUMB7-NEXT:    it lo
+; CHECK-THUMB7-NEXT:    movlo r12, r4
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB7-NEXT:    uxtb r4, r4
+; CHECK-THUMB7-NEXT:  .LBB10_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB10_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r4
+; CHECK-THUMB7-NEXT:    bne .LBB10_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB10_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexb r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB10_2
+; CHECK-THUMB7-NEXT:  .LBB10_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB10_1
+; CHECK-THUMB7-NEXT:    b .LBB10_5
+; CHECK-THUMB7-NEXT:  .LBB10_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    pop {r4, pc}
+;
+; CHECK-THUMB6-LABEL: test_umin_i8:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI10_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umin_1
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI10_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i8
+;
+; CHECK-THUMB8BASE-LABEL: test_umin_i8:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #24
+; CHECK-THUMB8BASE-NEXT:    sub sp, #24
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    ldrb r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB10_1
+; CHECK-THUMB8BASE-NEXT:  .LBB10_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB10_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxtb r1, r0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blo .LBB10_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB10_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i8
+; CHECK-THUMB8BASE-NEXT:    uxtb r4, r4
+; CHECK-THUMB8BASE-NEXT:  .LBB10_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB10_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexb r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r4
+; CHECK-THUMB8BASE-NEXT:    bne .LBB10_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB10_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexb r2, r5, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB10_4
+; CHECK-THUMB8BASE-NEXT:  .LBB10_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB10_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB10_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB10_7
+; CHECK-THUMB8BASE-NEXT:  .LBB10_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #24
+; CHECK-THUMB8BASE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = atomicrmw umin i8* @atomic_i8, i8 1 monotonic
   ret i8 %0
@@ -126,111 +2302,2286 @@ entry:
 
 
 define i16 @test_xchg_i16() {
-; COMMON-LABEL: test_xchg_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_lock_test_and_set_2
+; CHECK-ARM8-LABEL: test_xchg_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB11_1
+; CHECK-ARM8-NEXT:  .LBB11_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB11_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB11_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB11_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB11_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB11_2
+; CHECK-ARM8-NEXT:  .LBB11_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB11_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB11_1
+; CHECK-ARM8-NEXT:    b .LBB11_5
+; CHECK-ARM8-NEXT:  .LBB11_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xchg_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI11_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB11_1
+; CHECK-ARM6-NEXT:  .LBB11_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB11_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI11_0
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB11_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB11_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB11_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB11_2
+; CHECK-ARM6-NEXT:  .LBB11_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB11_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB11_1
+; CHECK-ARM6-NEXT:    b .LBB11_5
+; CHECK-ARM6-NEXT:  .LBB11_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI11_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_xchg_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB11_1
+; CHECK-THUMB7-NEXT:  .LBB11_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB11_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB11_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB11_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB11_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB11_2
+; CHECK-THUMB7-NEXT:  .LBB11_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB11_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB11_1
+; CHECK-THUMB7-NEXT:    b .LBB11_5
+; CHECK-THUMB7-NEXT:  .LBB11_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xchg_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI11_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_lock_test_and_set_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI11_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_xchg_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB11_1
+; CHECK-THUMB8BASE-NEXT:  .LBB11_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB11_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movs r4, #1
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB11_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB11_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB11_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB11_2
+; CHECK-THUMB8BASE-NEXT:  .LBB11_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB11_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB11_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB11_5
+; CHECK-THUMB8BASE-NEXT:  .LBB11_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xchg i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_add_i16() {
-; COMMON-LABEL: test_add_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_add_2
+; CHECK-ARM8-LABEL: test_add_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB12_1
+; CHECK-ARM8-NEXT:  .LBB12_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB12_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB12_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB12_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB12_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB12_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB12_2
+; CHECK-ARM8-NEXT:  .LBB12_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB12_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB12_1
+; CHECK-ARM8-NEXT:    b .LBB12_5
+; CHECK-ARM8-NEXT:  .LBB12_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_add_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI12_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB12_1
+; CHECK-ARM6-NEXT:  .LBB12_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB12_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI12_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB12_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB12_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB12_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB12_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB12_2
+; CHECK-ARM6-NEXT:  .LBB12_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB12_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB12_1
+; CHECK-ARM6-NEXT:    b .LBB12_5
+; CHECK-ARM6-NEXT:  .LBB12_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI12_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_add_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB12_1
+; CHECK-THUMB7-NEXT:  .LBB12_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB12_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB12_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB12_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB12_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB12_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB12_2
+; CHECK-THUMB7-NEXT:  .LBB12_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB12_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB12_1
+; CHECK-THUMB7-NEXT:    b .LBB12_5
+; CHECK-THUMB7-NEXT:  .LBB12_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_add_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI12_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_add_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI12_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_add_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB12_1
+; CHECK-THUMB8BASE-NEXT:  .LBB12_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB12_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    adds r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB12_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB12_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB12_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB12_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB12_2
+; CHECK-THUMB8BASE-NEXT:  .LBB12_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB12_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB12_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB12_5
+; CHECK-THUMB8BASE-NEXT:  .LBB12_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw add i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_sub_i16() {
-; COMMON-LABEL: test_sub_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_sub_2
+; CHECK-ARM8-LABEL: test_sub_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB13_1
+; CHECK-ARM8-NEXT:  .LBB13_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB13_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sub r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB13_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB13_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB13_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB13_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB13_2
+; CHECK-ARM8-NEXT:  .LBB13_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB13_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB13_1
+; CHECK-ARM8-NEXT:    b .LBB13_5
+; CHECK-ARM8-NEXT:  .LBB13_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_sub_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI13_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB13_1
+; CHECK-ARM6-NEXT:  .LBB13_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB13_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sub r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI13_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB13_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB13_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB13_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB13_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB13_2
+; CHECK-ARM6-NEXT:  .LBB13_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB13_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB13_1
+; CHECK-ARM6-NEXT:    b .LBB13_5
+; CHECK-ARM6-NEXT:  .LBB13_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI13_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_sub_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB13_1
+; CHECK-THUMB7-NEXT:  .LBB13_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB13_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sub.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB13_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB13_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB13_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB13_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB13_2
+; CHECK-THUMB7-NEXT:  .LBB13_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB13_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB13_1
+; CHECK-THUMB7-NEXT:    b .LBB13_5
+; CHECK-THUMB7-NEXT:  .LBB13_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_sub_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI13_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_sub_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI13_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_sub_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB13_1
+; CHECK-THUMB8BASE-NEXT:  .LBB13_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB13_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    subs r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB13_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB13_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB13_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB13_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB13_2
+; CHECK-THUMB8BASE-NEXT:  .LBB13_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB13_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB13_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB13_5
+; CHECK-THUMB8BASE-NEXT:  .LBB13_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw sub i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_and_i16() {
-; COMMON-LABEL: test_and_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_and_2
+; CHECK-ARM8-LABEL: test_and_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB14_1
+; CHECK-ARM8-NEXT:  .LBB14_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB14_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    and r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB14_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB14_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB14_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB14_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB14_2
+; CHECK-ARM8-NEXT:  .LBB14_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB14_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB14_1
+; CHECK-ARM8-NEXT:    b .LBB14_5
+; CHECK-ARM8-NEXT:  .LBB14_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_and_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI14_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB14_1
+; CHECK-ARM6-NEXT:  .LBB14_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB14_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    and r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI14_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB14_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB14_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB14_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB14_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB14_2
+; CHECK-ARM6-NEXT:  .LBB14_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB14_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB14_1
+; CHECK-ARM6-NEXT:    b .LBB14_5
+; CHECK-ARM6-NEXT:  .LBB14_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI14_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_and_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB14_1
+; CHECK-THUMB7-NEXT:  .LBB14_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB14_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    and r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB14_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB14_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB14_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB14_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB14_2
+; CHECK-THUMB7-NEXT:  .LBB14_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB14_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB14_1
+; CHECK-THUMB7-NEXT:    b .LBB14_5
+; CHECK-THUMB7-NEXT:  .LBB14_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_and_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI14_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_and_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI14_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_and_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB14_1
+; CHECK-THUMB8BASE-NEXT:  .LBB14_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB14_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    ands r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB14_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB14_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB14_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB14_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB14_2
+; CHECK-THUMB8BASE-NEXT:  .LBB14_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB14_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB14_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB14_5
+; CHECK-THUMB8BASE-NEXT:  .LBB14_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw and i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_nand_i16() {
-; COMMON-LABEL: test_nand_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_nand_2
+; CHECK-ARM8-LABEL: test_nand_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB15_1
+; CHECK-ARM8-NEXT:  .LBB15_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB15_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mvn r0, r1
+; CHECK-ARM8-NEXT:    mvn r2, #1
+; CHECK-ARM8-NEXT:    orr r12, r0, r2
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB15_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB15_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB15_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB15_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB15_2
+; CHECK-ARM8-NEXT:  .LBB15_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB15_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB15_1
+; CHECK-ARM8-NEXT:    b .LBB15_5
+; CHECK-ARM8-NEXT:  .LBB15_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_nand_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI15_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB15_1
+; CHECK-ARM6-NEXT:  .LBB15_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB15_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mvn r0, r1
+; CHECK-ARM6-NEXT:    mvn r2, #1
+; CHECK-ARM6-NEXT:    orr r12, r0, r2
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI15_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB15_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB15_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB15_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB15_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB15_2
+; CHECK-ARM6-NEXT:  .LBB15_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB15_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB15_1
+; CHECK-ARM6-NEXT:    b .LBB15_5
+; CHECK-ARM6-NEXT:  .LBB15_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI15_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_nand_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB15_1
+; CHECK-THUMB7-NEXT:  .LBB15_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB15_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mvn r0, #1
+; CHECK-THUMB7-NEXT:    orn r12, r0, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB15_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB15_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB15_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB15_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB15_2
+; CHECK-THUMB7-NEXT:  .LBB15_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB15_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB15_1
+; CHECK-THUMB7-NEXT:    b .LBB15_5
+; CHECK-THUMB7-NEXT:  .LBB15_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_nand_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI15_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_nand_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI15_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_nand_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB15_1
+; CHECK-THUMB8BASE-NEXT:  .LBB15_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB15_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    mvns r4, r1
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mvns r0, r0
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB15_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB15_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB15_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB15_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB15_2
+; CHECK-THUMB8BASE-NEXT:  .LBB15_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB15_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB15_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB15_5
+; CHECK-THUMB8BASE-NEXT:  .LBB15_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw nand i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_or_i16() {
-; COMMON-LABEL: test_or_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_or_2
+; CHECK-ARM8-LABEL: test_or_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB16_1
+; CHECK-ARM8-NEXT:  .LBB16_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB16_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    orr r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB16_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB16_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB16_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB16_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB16_2
+; CHECK-ARM8-NEXT:  .LBB16_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB16_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB16_1
+; CHECK-ARM8-NEXT:    b .LBB16_5
+; CHECK-ARM8-NEXT:  .LBB16_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_or_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI16_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB16_1
+; CHECK-ARM6-NEXT:  .LBB16_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB16_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    orr r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI16_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB16_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB16_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB16_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB16_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB16_2
+; CHECK-ARM6-NEXT:  .LBB16_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB16_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB16_1
+; CHECK-ARM6-NEXT:    b .LBB16_5
+; CHECK-ARM6-NEXT:  .LBB16_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI16_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_or_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB16_1
+; CHECK-THUMB7-NEXT:  .LBB16_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB16_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    orr r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB16_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB16_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB16_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB16_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB16_2
+; CHECK-THUMB7-NEXT:  .LBB16_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB16_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB16_1
+; CHECK-THUMB7-NEXT:    b .LBB16_5
+; CHECK-THUMB7-NEXT:  .LBB16_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_or_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI16_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_or_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI16_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_or_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB16_1
+; CHECK-THUMB8BASE-NEXT:  .LBB16_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB16_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB16_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB16_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB16_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB16_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB16_2
+; CHECK-THUMB8BASE-NEXT:  .LBB16_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB16_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB16_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB16_5
+; CHECK-THUMB8BASE-NEXT:  .LBB16_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw or i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_xor_i16() {
-; COMMON-LABEL: test_xor_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_xor_2
+; CHECK-ARM8-LABEL: test_xor_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB17_1
+; CHECK-ARM8-NEXT:  .LBB17_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB17_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    eor r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB17_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB17_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB17_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB17_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB17_2
+; CHECK-ARM8-NEXT:  .LBB17_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB17_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB17_1
+; CHECK-ARM8-NEXT:    b .LBB17_5
+; CHECK-ARM8-NEXT:  .LBB17_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xor_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI17_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB17_1
+; CHECK-ARM6-NEXT:  .LBB17_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB17_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    eor r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI17_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB17_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB17_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB17_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB17_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB17_2
+; CHECK-ARM6-NEXT:  .LBB17_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB17_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB17_1
+; CHECK-ARM6-NEXT:    b .LBB17_5
+; CHECK-ARM6-NEXT:  .LBB17_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI17_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_xor_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB17_1
+; CHECK-THUMB7-NEXT:  .LBB17_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB17_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    eor r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB17_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB17_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB17_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB17_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB17_2
+; CHECK-THUMB7-NEXT:  .LBB17_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB17_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB17_1
+; CHECK-THUMB7-NEXT:    b .LBB17_5
+; CHECK-THUMB7-NEXT:  .LBB17_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xor_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI17_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_xor_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI17_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_xor_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB17_1
+; CHECK-THUMB8BASE-NEXT:  .LBB17_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB17_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    eors r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB17_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB17_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB17_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB17_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB17_2
+; CHECK-THUMB8BASE-NEXT:  .LBB17_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB17_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB17_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB17_5
+; CHECK-THUMB8BASE-NEXT:  .LBB17_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xor i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_max_i16() {
-; COMMON-LABEL: test_max_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_max_2
+; CHECK-ARM8-LABEL: test_max_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB18_1
+; CHECK-ARM8-NEXT:  .LBB18_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB18_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sxth r0, r1
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #1
+; CHECK-ARM8-NEXT:    movgt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB18_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB18_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB18_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB18_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB18_2
+; CHECK-ARM8-NEXT:  .LBB18_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB18_1
+; CHECK-ARM8-NEXT:    b .LBB18_5
+; CHECK-ARM8-NEXT:  .LBB18_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_max_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI18_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB18_1
+; CHECK-ARM6-NEXT:  .LBB18_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB18_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sxth r0, r1
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #1
+; CHECK-ARM6-NEXT:    movgt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI18_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB18_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB18_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB18_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB18_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB18_2
+; CHECK-ARM6-NEXT:  .LBB18_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB18_1
+; CHECK-ARM6-NEXT:    b .LBB18_5
+; CHECK-ARM6-NEXT:  .LBB18_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI18_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_max_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB18_1
+; CHECK-THUMB7-NEXT:  .LBB18_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB18_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sxth r0, r1
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #1
+; CHECK-THUMB7-NEXT:    it gt
+; CHECK-THUMB7-NEXT:    movgt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB18_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB18_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB18_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB18_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB18_2
+; CHECK-THUMB7-NEXT:  .LBB18_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB18_1
+; CHECK-THUMB7-NEXT:    b .LBB18_5
+; CHECK-THUMB7-NEXT:  .LBB18_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_max_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI18_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_max_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI18_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_max_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB18_1
+; CHECK-THUMB8BASE-NEXT:  .LBB18_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB18_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    sxth r1, r0
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bgt .LBB18_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB18_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB18_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB18_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB18_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB18_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB18_4
+; CHECK-THUMB8BASE-NEXT:  .LBB18_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB18_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB18_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB18_7
+; CHECK-THUMB8BASE-NEXT:  .LBB18_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw max i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_min_i16() {
-; COMMON-LABEL: test_min_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_min_2
+; CHECK-ARM8-LABEL: test_min_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB19_1
+; CHECK-ARM8-NEXT:  .LBB19_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB19_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sxth r0, r1
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r0, #2
+; CHECK-ARM8-NEXT:    movlt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:  .LBB19_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB19_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB19_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB19_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB19_2
+; CHECK-ARM8-NEXT:  .LBB19_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    uxth r1, r1
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB19_1
+; CHECK-ARM8-NEXT:    b .LBB19_5
+; CHECK-ARM8-NEXT:  .LBB19_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_min_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI19_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB19_1
+; CHECK-ARM6-NEXT:  .LBB19_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB19_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sxth r0, r1
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r0, #2
+; CHECK-ARM6-NEXT:    movlt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI19_0
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:  .LBB19_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB19_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB19_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB19_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB19_2
+; CHECK-ARM6-NEXT:  .LBB19_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    uxth r1, r1
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB19_1
+; CHECK-ARM6-NEXT:    b .LBB19_5
+; CHECK-ARM6-NEXT:  .LBB19_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI19_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_min_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB19_1
+; CHECK-THUMB7-NEXT:  .LBB19_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB19_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sxth r0, r1
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #2
+; CHECK-THUMB7-NEXT:    it lt
+; CHECK-THUMB7-NEXT:    movlt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:  .LBB19_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB19_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB19_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB19_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB19_2
+; CHECK-THUMB7-NEXT:  .LBB19_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    uxth r1, r1
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB19_1
+; CHECK-THUMB7-NEXT:    b .LBB19_5
+; CHECK-THUMB7-NEXT:  .LBB19_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_min_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI19_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_min_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI19_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_min_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB19_1
+; CHECK-THUMB8BASE-NEXT:  .LBB19_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB19_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    sxth r1, r0
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB19_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB19_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:  .LBB19_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB19_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB19_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB19_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB19_4
+; CHECK-THUMB8BASE-NEXT:  .LBB19_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB19_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r1
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB19_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB19_7
+; CHECK-THUMB8BASE-NEXT:  .LBB19_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw min i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_umax_i16() {
-; COMMON-LABEL: test_umax_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_umax_2
+; CHECK-ARM8-LABEL: test_umax_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r11, lr}
+; CHECK-ARM8-NEXT:    push {r11, lr}
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB20_1
+; CHECK-ARM8-NEXT:  .LBB20_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB20_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    uxth r1, r12
+; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    movhi lr, r12
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r12, r12
+; CHECK-ARM8-NEXT:  .LBB20_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB20_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r12
+; CHECK-ARM8-NEXT:    bne .LBB20_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB20_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, lr, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB20_2
+; CHECK-ARM8-NEXT:  .LBB20_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB20_1
+; CHECK-ARM8-NEXT:    b .LBB20_5
+; CHECK-ARM8-NEXT:  .LBB20_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    pop {r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umax_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r11, lr}
+; CHECK-ARM6-NEXT:    push {r11, lr}
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI20_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB20_1
+; CHECK-ARM6-NEXT:  .LBB20_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB20_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    uxth r1, r12
+; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    movhi lr, r12
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI20_0
+; CHECK-ARM6-NEXT:    uxth r12, r12
+; CHECK-ARM6-NEXT:  .LBB20_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB20_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r12
+; CHECK-ARM6-NEXT:    bne .LBB20_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB20_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, lr, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB20_2
+; CHECK-ARM6-NEXT:  .LBB20_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB20_1
+; CHECK-ARM6-NEXT:    b .LBB20_5
+; CHECK-ARM6-NEXT:  .LBB20_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI20_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_umax_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, lr}
+; CHECK-THUMB7-NEXT:    push {r4, lr}
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB20_1
+; CHECK-THUMB7-NEXT:  .LBB20_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB20_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    uxth r1, r4
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    it hi
+; CHECK-THUMB7-NEXT:    movhi r12, r4
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r4, r4
+; CHECK-THUMB7-NEXT:  .LBB20_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB20_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r4
+; CHECK-THUMB7-NEXT:    bne .LBB20_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB20_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB20_2
+; CHECK-THUMB7-NEXT:  .LBB20_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB20_1
+; CHECK-THUMB7-NEXT:    b .LBB20_5
+; CHECK-THUMB7-NEXT:  .LBB20_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    pop {r4, pc}
+;
+; CHECK-THUMB6-LABEL: test_umax_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI20_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umax_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI20_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_umax_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #24
+; CHECK-THUMB8BASE-NEXT:    sub sp, #24
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB20_1
+; CHECK-THUMB8BASE-NEXT:  .LBB20_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB20_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bhi .LBB20_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB20_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r4, r4
+; CHECK-THUMB8BASE-NEXT:  .LBB20_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB20_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r4
+; CHECK-THUMB8BASE-NEXT:    bne .LBB20_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB20_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r5, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB20_4
+; CHECK-THUMB8BASE-NEXT:  .LBB20_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB20_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB20_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB20_7
+; CHECK-THUMB8BASE-NEXT:  .LBB20_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #24
+; CHECK-THUMB8BASE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = atomicrmw umax i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
 }
 define i16 @test_umin_i16() {
-; COMMON-LABEL: test_umin_i16:
-; EXPAND32: ldrexh
-; EXPAND32-NOT: str
-; EXPAND32: strexh
-; THUMB1: bl __sync_fetch_and_umin_2
+; CHECK-ARM8-LABEL: test_umin_i16:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r11, lr}
+; CHECK-ARM8-NEXT:    push {r11, lr}
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    ldrh r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB21_1
+; CHECK-ARM8-NEXT:  .LBB21_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB21_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    uxth r1, r12
+; CHECK-ARM8-NEXT:    mov lr, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
+; CHECK-ARM8-NEXT:    movlo lr, r12
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-ARM8-NEXT:    uxth r12, r12
+; CHECK-ARM8-NEXT:  .LBB21_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB21_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r12
+; CHECK-ARM8-NEXT:    bne .LBB21_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB21_2 Depth=2
+; CHECK-ARM8-NEXT:    strexh r2, lr, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB21_2
+; CHECK-ARM8-NEXT:  .LBB21_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB21_1
+; CHECK-ARM8-NEXT:    b .LBB21_5
+; CHECK-ARM8-NEXT:  .LBB21_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    pop {r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umin_i16:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r11, lr}
+; CHECK-ARM6-NEXT:    push {r11, lr}
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI21_0
+; CHECK-ARM6-NEXT:    ldrh r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB21_1
+; CHECK-ARM6-NEXT:  .LBB21_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB21_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r12, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    uxth r1, r12
+; CHECK-ARM6-NEXT:    mov lr, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
+; CHECK-ARM6-NEXT:    movlo lr, r12
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI21_0
+; CHECK-ARM6-NEXT:    uxth r12, r12
+; CHECK-ARM6-NEXT:  .LBB21_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB21_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexh r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r12
+; CHECK-ARM6-NEXT:    bne .LBB21_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB21_2 Depth=2
+; CHECK-ARM6-NEXT:    strexh r2, lr, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB21_2
+; CHECK-ARM6-NEXT:  .LBB21_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB21_1
+; CHECK-ARM6-NEXT:    b .LBB21_5
+; CHECK-ARM6-NEXT:  .LBB21_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI21_0:
+; CHECK-ARM6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB7-LABEL: test_umin_i16:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, lr}
+; CHECK-THUMB7-NEXT:    push {r4, lr}
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB21_1
+; CHECK-THUMB7-NEXT:  .LBB21_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB21_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    uxth r1, r4
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
+; CHECK-THUMB7-NEXT:    it lo
+; CHECK-THUMB7-NEXT:    movlo r12, r4
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB7-NEXT:    uxth r4, r4
+; CHECK-THUMB7-NEXT:  .LBB21_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB21_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r4
+; CHECK-THUMB7-NEXT:    bne .LBB21_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB21_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexh r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB21_2
+; CHECK-THUMB7-NEXT:  .LBB21_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB21_1
+; CHECK-THUMB7-NEXT:    b .LBB21_5
+; CHECK-THUMB7-NEXT:  .LBB21_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    pop {r4, pc}
+;
+; CHECK-THUMB6-LABEL: test_umin_i16:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI21_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umin_2
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI21_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i16
+;
+; CHECK-THUMB8BASE-LABEL: test_umin_i16:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #24
+; CHECK-THUMB8BASE-NEXT:    sub sp, #24
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    ldrh r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB21_1
+; CHECK-THUMB8BASE-NEXT:  .LBB21_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB21_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    uxth r1, r0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blo .LBB21_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB21_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r5, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i16
+; CHECK-THUMB8BASE-NEXT:    uxth r4, r4
+; CHECK-THUMB8BASE-NEXT:  .LBB21_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB21_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrexh r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r4
+; CHECK-THUMB8BASE-NEXT:    bne .LBB21_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB21_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strexh r2, r5, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB21_4
+; CHECK-THUMB8BASE-NEXT:  .LBB21_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB21_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB21_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB21_7
+; CHECK-THUMB8BASE-NEXT:  .LBB21_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #24
+; CHECK-THUMB8BASE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   %0 = atomicrmw umin i16* @atomic_i16, i16 1 monotonic
   ret i16 %0
@@ -238,235 +4589,4861 @@ entry:
 
 
 define i32 @test_xchg_i32() {
-; COMMON-LABEL: test_xchg_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_lock_test_and_set_4
+; CHECK-ARM8-LABEL: test_xchg_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB22_1
+; CHECK-ARM8-NEXT:  .LBB22_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB22_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:  .LBB22_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB22_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB22_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB22_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB22_2
+; CHECK-ARM8-NEXT:  .LBB22_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB22_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB22_1
+; CHECK-ARM8-NEXT:    b .LBB22_5
+; CHECK-ARM8-NEXT:  .LBB22_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xchg_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI22_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB22_1
+; CHECK-ARM6-NEXT:  .LBB22_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB22_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI22_0
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:  .LBB22_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB22_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB22_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB22_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB22_2
+; CHECK-ARM6-NEXT:  .LBB22_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB22_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB22_1
+; CHECK-ARM6-NEXT:    b .LBB22_5
+; CHECK-ARM6-NEXT:  .LBB22_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI22_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_xchg_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB22_1
+; CHECK-THUMB7-NEXT:  .LBB22_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB22_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:  .LBB22_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB22_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB22_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB22_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB22_2
+; CHECK-THUMB7-NEXT:  .LBB22_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB22_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB22_1
+; CHECK-THUMB7-NEXT:    b .LBB22_5
+; CHECK-THUMB7-NEXT:  .LBB22_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xchg_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI22_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_lock_test_and_set_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI22_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_xchg_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB22_1
+; CHECK-THUMB8BASE-NEXT:  .LBB22_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB22_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movs r4, #1
+; CHECK-THUMB8BASE-NEXT:  .LBB22_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB22_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB22_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB22_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB22_2
+; CHECK-THUMB8BASE-NEXT:  .LBB22_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB22_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB22_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB22_5
+; CHECK-THUMB8BASE-NEXT:  .LBB22_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xchg i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_add_i32() {
-; COMMON-LABEL: test_add_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_add_4
+; CHECK-ARM8-LABEL: test_add_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB23_1
+; CHECK-ARM8-NEXT:  .LBB23_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB23_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB23_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB23_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB23_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB23_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB23_2
+; CHECK-ARM8-NEXT:  .LBB23_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB23_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB23_1
+; CHECK-ARM8-NEXT:    b .LBB23_5
+; CHECK-ARM8-NEXT:  .LBB23_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_add_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI23_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB23_1
+; CHECK-ARM6-NEXT:  .LBB23_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB23_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI23_0
+; CHECK-ARM6-NEXT:  .LBB23_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB23_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB23_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB23_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB23_2
+; CHECK-ARM6-NEXT:  .LBB23_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB23_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB23_1
+; CHECK-ARM6-NEXT:    b .LBB23_5
+; CHECK-ARM6-NEXT:  .LBB23_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI23_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_add_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB23_1
+; CHECK-THUMB7-NEXT:  .LBB23_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB23_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB23_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB23_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB23_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB23_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB23_2
+; CHECK-THUMB7-NEXT:  .LBB23_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB23_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB23_1
+; CHECK-THUMB7-NEXT:    b .LBB23_5
+; CHECK-THUMB7-NEXT:  .LBB23_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_add_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI23_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_add_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI23_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_add_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB23_1
+; CHECK-THUMB8BASE-NEXT:  .LBB23_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB23_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    adds r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB23_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB23_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB23_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB23_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB23_2
+; CHECK-THUMB8BASE-NEXT:  .LBB23_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB23_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB23_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB23_5
+; CHECK-THUMB8BASE-NEXT:  .LBB23_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw add i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_sub_i32() {
-; COMMON-LABEL: test_sub_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_sub_4
+; CHECK-ARM8-LABEL: test_sub_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB24_1
+; CHECK-ARM8-NEXT:  .LBB24_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB24_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    sub r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB24_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB24_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB24_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB24_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB24_2
+; CHECK-ARM8-NEXT:  .LBB24_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB24_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB24_1
+; CHECK-ARM8-NEXT:    b .LBB24_5
+; CHECK-ARM8-NEXT:  .LBB24_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_sub_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI24_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB24_1
+; CHECK-ARM6-NEXT:  .LBB24_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB24_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    sub r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI24_0
+; CHECK-ARM6-NEXT:  .LBB24_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB24_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB24_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB24_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB24_2
+; CHECK-ARM6-NEXT:  .LBB24_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB24_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB24_1
+; CHECK-ARM6-NEXT:    b .LBB24_5
+; CHECK-ARM6-NEXT:  .LBB24_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI24_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_sub_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB24_1
+; CHECK-THUMB7-NEXT:  .LBB24_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB24_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    sub.w r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB24_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB24_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB24_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB24_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB24_2
+; CHECK-THUMB7-NEXT:  .LBB24_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB24_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB24_1
+; CHECK-THUMB7-NEXT:    b .LBB24_5
+; CHECK-THUMB7-NEXT:  .LBB24_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_sub_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI24_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_sub_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI24_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_sub_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB24_1
+; CHECK-THUMB8BASE-NEXT:  .LBB24_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB24_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    subs r4, r1, #1
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB24_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB24_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB24_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB24_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB24_2
+; CHECK-THUMB8BASE-NEXT:  .LBB24_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB24_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB24_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB24_5
+; CHECK-THUMB8BASE-NEXT:  .LBB24_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw sub i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_and_i32() {
-; COMMON-LABEL: test_and_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_and_4
+; CHECK-ARM8-LABEL: test_and_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB25_1
+; CHECK-ARM8-NEXT:  .LBB25_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB25_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    and r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB25_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB25_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB25_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB25_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB25_2
+; CHECK-ARM8-NEXT:  .LBB25_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB25_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB25_1
+; CHECK-ARM8-NEXT:    b .LBB25_5
+; CHECK-ARM8-NEXT:  .LBB25_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_and_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI25_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB25_1
+; CHECK-ARM6-NEXT:  .LBB25_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB25_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    and r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI25_0
+; CHECK-ARM6-NEXT:  .LBB25_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB25_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB25_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB25_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB25_2
+; CHECK-ARM6-NEXT:  .LBB25_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB25_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB25_1
+; CHECK-ARM6-NEXT:    b .LBB25_5
+; CHECK-ARM6-NEXT:  .LBB25_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI25_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_and_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB25_1
+; CHECK-THUMB7-NEXT:  .LBB25_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB25_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    and r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB25_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB25_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB25_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB25_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB25_2
+; CHECK-THUMB7-NEXT:  .LBB25_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB25_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB25_1
+; CHECK-THUMB7-NEXT:    b .LBB25_5
+; CHECK-THUMB7-NEXT:  .LBB25_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_and_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI25_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_and_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI25_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_and_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB25_1
+; CHECK-THUMB8BASE-NEXT:  .LBB25_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB25_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    ands r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB25_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB25_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB25_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB25_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB25_2
+; CHECK-THUMB8BASE-NEXT:  .LBB25_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB25_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB25_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB25_5
+; CHECK-THUMB8BASE-NEXT:  .LBB25_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw and i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_nand_i32() {
-; COMMON-LABEL: test_nand_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_nand_4
+; CHECK-ARM8-LABEL: test_nand_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB26_1
+; CHECK-ARM8-NEXT:  .LBB26_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB26_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mvn r0, r1
+; CHECK-ARM8-NEXT:    mvn r2, #1
+; CHECK-ARM8-NEXT:    orr r12, r0, r2
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB26_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB26_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB26_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB26_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB26_2
+; CHECK-ARM8-NEXT:  .LBB26_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB26_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB26_1
+; CHECK-ARM8-NEXT:    b .LBB26_5
+; CHECK-ARM8-NEXT:  .LBB26_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_nand_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI26_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB26_1
+; CHECK-ARM6-NEXT:  .LBB26_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB26_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mvn r0, r1
+; CHECK-ARM6-NEXT:    mvn r2, #1
+; CHECK-ARM6-NEXT:    orr r12, r0, r2
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI26_0
+; CHECK-ARM6-NEXT:  .LBB26_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB26_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB26_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB26_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB26_2
+; CHECK-ARM6-NEXT:  .LBB26_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB26_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB26_1
+; CHECK-ARM6-NEXT:    b .LBB26_5
+; CHECK-ARM6-NEXT:  .LBB26_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI26_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_nand_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB26_1
+; CHECK-THUMB7-NEXT:  .LBB26_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB26_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mvn r0, #1
+; CHECK-THUMB7-NEXT:    orn r12, r0, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB26_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB26_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB26_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB26_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB26_2
+; CHECK-THUMB7-NEXT:  .LBB26_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB26_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB26_1
+; CHECK-THUMB7-NEXT:    b .LBB26_5
+; CHECK-THUMB7-NEXT:  .LBB26_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_nand_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI26_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_nand_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI26_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_nand_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB26_1
+; CHECK-THUMB8BASE-NEXT:  .LBB26_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB26_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    mvns r4, r1
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mvns r0, r0
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB26_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB26_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB26_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB26_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB26_2
+; CHECK-THUMB8BASE-NEXT:  .LBB26_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB26_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB26_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB26_5
+; CHECK-THUMB8BASE-NEXT:  .LBB26_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw nand i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_or_i32() {
-; COMMON-LABEL: test_or_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_or_4
+; CHECK-ARM8-LABEL: test_or_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB27_1
+; CHECK-ARM8-NEXT:  .LBB27_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB27_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    orr r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB27_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB27_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB27_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB27_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB27_2
+; CHECK-ARM8-NEXT:  .LBB27_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB27_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB27_1
+; CHECK-ARM8-NEXT:    b .LBB27_5
+; CHECK-ARM8-NEXT:  .LBB27_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_or_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI27_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB27_1
+; CHECK-ARM6-NEXT:  .LBB27_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB27_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    orr r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI27_0
+; CHECK-ARM6-NEXT:  .LBB27_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB27_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB27_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB27_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB27_2
+; CHECK-ARM6-NEXT:  .LBB27_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB27_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB27_1
+; CHECK-ARM6-NEXT:    b .LBB27_5
+; CHECK-ARM6-NEXT:  .LBB27_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI27_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_or_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB27_1
+; CHECK-THUMB7-NEXT:  .LBB27_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB27_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    orr r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB27_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB27_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB27_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB27_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB27_2
+; CHECK-THUMB7-NEXT:  .LBB27_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB27_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB27_1
+; CHECK-THUMB7-NEXT:    b .LBB27_5
+; CHECK-THUMB7-NEXT:  .LBB27_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_or_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI27_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_or_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI27_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_or_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB27_1
+; CHECK-THUMB8BASE-NEXT:  .LBB27_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB27_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    orrs r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB27_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB27_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB27_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB27_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB27_2
+; CHECK-THUMB8BASE-NEXT:  .LBB27_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB27_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB27_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB27_5
+; CHECK-THUMB8BASE-NEXT:  .LBB27_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw or i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_xor_i32() {
-; COMMON-LABEL: test_xor_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_xor_4
+; CHECK-ARM8-LABEL: test_xor_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB28_1
+; CHECK-ARM8-NEXT:  .LBB28_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB28_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    eor r12, r1, #1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB28_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB28_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB28_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB28_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB28_2
+; CHECK-ARM8-NEXT:  .LBB28_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB28_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB28_1
+; CHECK-ARM8-NEXT:    b .LBB28_5
+; CHECK-ARM8-NEXT:  .LBB28_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_xor_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI28_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB28_1
+; CHECK-ARM6-NEXT:  .LBB28_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB28_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    eor r12, r1, #1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI28_0
+; CHECK-ARM6-NEXT:  .LBB28_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB28_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB28_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB28_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB28_2
+; CHECK-ARM6-NEXT:  .LBB28_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB28_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB28_1
+; CHECK-ARM6-NEXT:    b .LBB28_5
+; CHECK-ARM6-NEXT:  .LBB28_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI28_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_xor_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB28_1
+; CHECK-THUMB7-NEXT:  .LBB28_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB28_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    eor r12, r1, #1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB28_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB28_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB28_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB28_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB28_2
+; CHECK-THUMB7-NEXT:  .LBB28_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB28_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB28_1
+; CHECK-THUMB7-NEXT:    b .LBB28_5
+; CHECK-THUMB7-NEXT:  .LBB28_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_xor_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI28_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_xor_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI28_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_xor_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB28_1
+; CHECK-THUMB8BASE-NEXT:  .LBB28_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB28_2 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    mov r4, r1
+; CHECK-THUMB8BASE-NEXT:    eors r4, r0
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB28_2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB28_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB28_4
+; CHECK-THUMB8BASE-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB28_2 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB28_2
+; CHECK-THUMB8BASE-NEXT:  .LBB28_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB28_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB28_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB28_5
+; CHECK-THUMB8BASE-NEXT:  .LBB28_5: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw xor i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_max_i32() {
-; COMMON-LABEL: test_max_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_max_4
+; CHECK-ARM8-LABEL: test_max_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB29_1
+; CHECK-ARM8-NEXT:  .LBB29_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB29_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    movgt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB29_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB29_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB29_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB29_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB29_2
+; CHECK-ARM8-NEXT:  .LBB29_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB29_1
+; CHECK-ARM8-NEXT:    b .LBB29_5
+; CHECK-ARM8-NEXT:  .LBB29_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_max_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI29_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB29_1
+; CHECK-ARM6-NEXT:  .LBB29_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB29_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    movgt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI29_0
+; CHECK-ARM6-NEXT:  .LBB29_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB29_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB29_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB29_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB29_2
+; CHECK-ARM6-NEXT:  .LBB29_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB29_1
+; CHECK-ARM6-NEXT:    b .LBB29_5
+; CHECK-ARM6-NEXT:  .LBB29_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI29_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_max_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB29_1
+; CHECK-THUMB7-NEXT:  .LBB29_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB29_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    it gt
+; CHECK-THUMB7-NEXT:    movgt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB29_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB29_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB29_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB29_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB29_2
+; CHECK-THUMB7-NEXT:  .LBB29_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB29_1
+; CHECK-THUMB7-NEXT:    b .LBB29_5
+; CHECK-THUMB7-NEXT:  .LBB29_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_max_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI29_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_max_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI29_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_max_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB29_1
+; CHECK-THUMB8BASE-NEXT:  .LBB29_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB29_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bgt .LBB29_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB29_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB29_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB29_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB29_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB29_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB29_4
+; CHECK-THUMB8BASE-NEXT:  .LBB29_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB29_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB29_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB29_7
+; CHECK-THUMB8BASE-NEXT:  .LBB29_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw max i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_min_i32() {
-; COMMON-LABEL: test_min_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-
-; THUMB1: bl __sync_fetch_and_min_4
+; CHECK-ARM8-LABEL: test_min_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB30_1
+; CHECK-ARM8-NEXT:  .LBB30_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB30_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
+; CHECK-ARM8-NEXT:    movlt r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB30_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB30_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB30_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB30_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB30_2
+; CHECK-ARM8-NEXT:  .LBB30_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB30_1
+; CHECK-ARM8-NEXT:    b .LBB30_5
+; CHECK-ARM8-NEXT:  .LBB30_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_min_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI30_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB30_1
+; CHECK-ARM6-NEXT:  .LBB30_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB30_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
+; CHECK-ARM6-NEXT:    movlt r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI30_0
+; CHECK-ARM6-NEXT:  .LBB30_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB30_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB30_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB30_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB30_2
+; CHECK-ARM6-NEXT:  .LBB30_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB30_1
+; CHECK-ARM6-NEXT:    b .LBB30_5
+; CHECK-ARM6-NEXT:  .LBB30_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI30_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_min_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB30_1
+; CHECK-THUMB7-NEXT:  .LBB30_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB30_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
+; CHECK-THUMB7-NEXT:    it lt
+; CHECK-THUMB7-NEXT:    movlt r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB30_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB30_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB30_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB30_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB30_2
+; CHECK-THUMB7-NEXT:  .LBB30_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB30_1
+; CHECK-THUMB7-NEXT:    b .LBB30_5
+; CHECK-THUMB7-NEXT:  .LBB30_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_min_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI30_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_min_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI30_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_min_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB30_1
+; CHECK-THUMB8BASE-NEXT:  .LBB30_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB30_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r0, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB30_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB30_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB30_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB30_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB30_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB30_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB30_4
+; CHECK-THUMB8BASE-NEXT:  .LBB30_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB30_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB30_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB30_7
+; CHECK-THUMB8BASE-NEXT:  .LBB30_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw min i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_umax_i32() {
-; COMMON-LABEL: test_umax_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_umax_4
+; CHECK-ARM8-LABEL: test_umax_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB31_1
+; CHECK-ARM8-NEXT:  .LBB31_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB31_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    movhi r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB31_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB31_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB31_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB31_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB31_2
+; CHECK-ARM8-NEXT:  .LBB31_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB31_1
+; CHECK-ARM8-NEXT:    b .LBB31_5
+; CHECK-ARM8-NEXT:  .LBB31_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_umax_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI31_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB31_1
+; CHECK-ARM6-NEXT:  .LBB31_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB31_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    movhi r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI31_0
+; CHECK-ARM6-NEXT:  .LBB31_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB31_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB31_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB31_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB31_2
+; CHECK-ARM6-NEXT:  .LBB31_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB31_1
+; CHECK-ARM6-NEXT:    b .LBB31_5
+; CHECK-ARM6-NEXT:  .LBB31_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI31_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_umax_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB31_1
+; CHECK-THUMB7-NEXT:  .LBB31_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB31_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    it hi
+; CHECK-THUMB7-NEXT:    movhi r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB31_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB31_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB31_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB31_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB31_2
+; CHECK-THUMB7-NEXT:  .LBB31_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB31_1
+; CHECK-THUMB7-NEXT:    b .LBB31_5
+; CHECK-THUMB7-NEXT:  .LBB31_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_umax_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI31_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umax_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI31_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_umax_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB31_1
+; CHECK-THUMB8BASE-NEXT:  .LBB31_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB31_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bhi .LBB31_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB31_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB31_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB31_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB31_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB31_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB31_4
+; CHECK-THUMB8BASE-NEXT:  .LBB31_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB31_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB31_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB31_7
+; CHECK-THUMB8BASE-NEXT:  .LBB31_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw umax i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 define i32 @test_umin_i32() {
-; COMMON-LABEL: test_umin_i32:
-; EXPAND32: ldrex
-; EXPAND32-NOT: str
-; EXPAND32: strex
-; THUMB1: bl __sync_fetch_and_umin_4
+; CHECK-ARM8-LABEL: test_umin_i32:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .pad #8
+; CHECK-ARM8-NEXT:    sub sp, sp, #8
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:    ldr r0, [r0]
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB32_1
+; CHECK-ARM8-NEXT:  .LBB32_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB32_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r12, #1
+; CHECK-ARM8-NEXT:    cmp r1, #2
+; CHECK-ARM8-NEXT:    movlo r12, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-ARM8-NEXT:  .LBB32_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB32_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrex r0, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, r1
+; CHECK-ARM8-NEXT:    bne .LBB32_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB32_2 Depth=2
+; CHECK-ARM8-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    bne .LBB32_2
+; CHECK-ARM8-NEXT:  .LBB32_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    sub r1, r0, r1
+; CHECK-ARM8-NEXT:    clz r1, r1
+; CHECK-ARM8-NEXT:    lsr r1, r1, #5
+; CHECK-ARM8-NEXT:    cmp r1, #1
+; CHECK-ARM8-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB32_1
+; CHECK-ARM8-NEXT:    b .LBB32_5
+; CHECK-ARM8-NEXT:  .LBB32_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #8
+; CHECK-ARM8-NEXT:    bx lr
+;
+; CHECK-ARM6-LABEL: test_umin_i32:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .pad #8
+; CHECK-ARM6-NEXT:    sub sp, sp, #8
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI32_0
+; CHECK-ARM6-NEXT:    ldr r0, [r0]
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB32_1
+; CHECK-ARM6-NEXT:  .LBB32_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB32_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r12, #1
+; CHECK-ARM6-NEXT:    cmp r1, #2
+; CHECK-ARM6-NEXT:    movlo r12, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI32_0
+; CHECK-ARM6-NEXT:  .LBB32_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB32_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrex r0, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, r1
+; CHECK-ARM6-NEXT:    bne .LBB32_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB32_2 Depth=2
+; CHECK-ARM6-NEXT:    strex r2, r12, [r3]
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    bne .LBB32_2
+; CHECK-ARM6-NEXT:  .LBB32_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    sub r1, r0, r1
+; CHECK-ARM6-NEXT:    clz r1, r1
+; CHECK-ARM6-NEXT:    lsr r1, r1, #5
+; CHECK-ARM6-NEXT:    cmp r1, #1
+; CHECK-ARM6-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB32_1
+; CHECK-ARM6-NEXT:    b .LBB32_5
+; CHECK-ARM6-NEXT:  .LBB32_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #8
+; CHECK-ARM6-NEXT:    bx lr
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI32_0:
+; CHECK-ARM6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB7-LABEL: test_umin_i32:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .pad #8
+; CHECK-THUMB7-NEXT:    sub sp, #8
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:    ldr r0, [r0]
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB32_1
+; CHECK-THUMB7-NEXT:  .LBB32_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB32_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov.w r12, #1
+; CHECK-THUMB7-NEXT:    cmp r1, #2
+; CHECK-THUMB7-NEXT:    it lo
+; CHECK-THUMB7-NEXT:    movlo r12, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB7-NEXT:  .LBB32_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB32_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, r1
+; CHECK-THUMB7-NEXT:    bne .LBB32_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB32_2 Depth=2
+; CHECK-THUMB7-NEXT:    strex r2, r12, [r3]
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    bne .LBB32_2
+; CHECK-THUMB7-NEXT:  .LBB32_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    subs r1, r0, r1
+; CHECK-THUMB7-NEXT:    clz r1, r1
+; CHECK-THUMB7-NEXT:    lsrs r1, r1, #5
+; CHECK-THUMB7-NEXT:    cmp r1, #1
+; CHECK-THUMB7-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB32_1
+; CHECK-THUMB7-NEXT:    b .LBB32_5
+; CHECK-THUMB7-NEXT:  .LBB32_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #8
+; CHECK-THUMB7-NEXT:    bx lr
+;
+; CHECK-THUMB6-LABEL: test_umin_i32:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI32_0
+; CHECK-THUMB6-NEXT:    movs r1, #1
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umin_4
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI32_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i32
+;
+; CHECK-THUMB8BASE-LABEL: test_umin_i32:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #20
+; CHECK-THUMB8BASE-NEXT:    sub sp, #20
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r0]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB32_1
+; CHECK-THUMB8BASE-NEXT:  .LBB32_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ Child Loop BB32_4 Depth 2
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r0, #2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blo .LBB32_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB32_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    movw r3, :lower16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:    movt r3, :upper16:atomic_i32
+; CHECK-THUMB8BASE-NEXT:  .LBB32_4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ Parent Loop BB32_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB8BASE-NEXT:    ldrex r0, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp r0, r1
+; CHECK-THUMB8BASE-NEXT:    bne .LBB32_6
+; CHECK-THUMB8BASE-NEXT:  @ %bb.5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB32_4 Depth=2
+; CHECK-THUMB8BASE-NEXT:    strex r2, r4, [r3]
+; CHECK-THUMB8BASE-NEXT:    cmp.w r2, #0
+; CHECK-THUMB8BASE-NEXT:    bne .LBB32_4
+; CHECK-THUMB8BASE-NEXT:  .LBB32_6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB32_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r1, r0, r1
+; CHECK-THUMB8BASE-NEXT:    rsbs r2, r1, #0
+; CHECK-THUMB8BASE-NEXT:    adcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    cmp r1, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bne .LBB32_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB32_7
+; CHECK-THUMB8BASE-NEXT:  .LBB32_7: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #20
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw umin i32* @atomic_i32, i32 1 monotonic
   ret i32 %0
 }
 
 define i64 @test_xchg_i64() {
-; COMMON-LABEL: test_xchg_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_lock_test_and_set_8
-; BASELINE64: bl __atomic_exchange_8
+; CHECK-ARM8-LABEL: test_xchg_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB33_1
+; CHECK-ARM8-NEXT:  .LBB33_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB33_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:  .LBB33_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB33_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB33_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB33_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB33_2
+; CHECK-ARM8-NEXT:  .LBB33_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB33_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB33_1
+; CHECK-ARM8-NEXT:    b .LBB33_5
+; CHECK-ARM8-NEXT:  .LBB33_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_xchg_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI33_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB33_1
+; CHECK-ARM6-NEXT:  .LBB33_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB33_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI33_0
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:  .LBB33_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB33_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB33_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB33_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB33_2
+; CHECK-ARM6-NEXT:  .LBB33_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB33_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB33_1
+; CHECK-ARM6-NEXT:    b .LBB33_5
+; CHECK-ARM6-NEXT:  .LBB33_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI33_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_xchg_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB33_1
+; CHECK-THUMB7-NEXT:  .LBB33_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB33_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    movs r0, #0
+; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:  .LBB33_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB33_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB33_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB33_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB33_2
+; CHECK-THUMB7-NEXT:  .LBB33_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB33_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB33_1
+; CHECK-THUMB7-NEXT:    b .LBB33_5
+; CHECK-THUMB7-NEXT:  .LBB33_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_xchg_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI33_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_lock_test_and_set_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI33_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_xchg_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_exchange_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_add_i64() {
-; COMMON-LABEL: test_add_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_add_8
-; BASELINE64: bl __atomic_fetch_add_8
+; CHECK-ARM8-LABEL: test_add_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB34_1
+; CHECK-ARM8-NEXT:  .LBB34_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB34_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    adds r8, r2, #1
+; CHECK-ARM8-NEXT:    adc r0, r1, #0
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB34_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB34_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB34_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB34_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB34_2
+; CHECK-ARM8-NEXT:  .LBB34_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB34_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB34_1
+; CHECK-ARM8-NEXT:    b .LBB34_5
+; CHECK-ARM8-NEXT:  .LBB34_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_add_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI34_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB34_1
+; CHECK-ARM6-NEXT:  .LBB34_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB34_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    adds r8, r2, #1
+; CHECK-ARM6-NEXT:    adc r0, r1, #0
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI34_0
+; CHECK-ARM6-NEXT:  .LBB34_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB34_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB34_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB34_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB34_2
+; CHECK-ARM6-NEXT:  .LBB34_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB34_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB34_1
+; CHECK-ARM6-NEXT:    b .LBB34_5
+; CHECK-ARM6-NEXT:  .LBB34_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI34_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_add_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB34_1
+; CHECK-THUMB7-NEXT:  .LBB34_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB34_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    adds.w r8, r2, #1
+; CHECK-THUMB7-NEXT:    adc r0, r1, #0
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB34_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB34_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB34_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB34_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB34_2
+; CHECK-THUMB7-NEXT:  .LBB34_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB34_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB34_1
+; CHECK-THUMB7-NEXT:    b .LBB34_5
+; CHECK-THUMB7-NEXT:  .LBB34_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_add_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI34_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_add_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI34_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_add_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_add_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_sub_i64() {
-; COMMON-LABEL: test_sub_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_sub_8
-; BASELINE64: bl __atomic_fetch_sub_8
+; CHECK-ARM8-LABEL: test_sub_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB35_1
+; CHECK-ARM8-NEXT:  .LBB35_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB35_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    subs r8, r2, #1
+; CHECK-ARM8-NEXT:    sbc r0, r1, #0
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB35_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB35_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB35_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB35_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB35_2
+; CHECK-ARM8-NEXT:  .LBB35_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB35_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB35_1
+; CHECK-ARM8-NEXT:    b .LBB35_5
+; CHECK-ARM8-NEXT:  .LBB35_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_sub_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI35_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB35_1
+; CHECK-ARM6-NEXT:  .LBB35_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB35_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    subs r8, r2, #1
+; CHECK-ARM6-NEXT:    sbc r0, r1, #0
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI35_0
+; CHECK-ARM6-NEXT:  .LBB35_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB35_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB35_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB35_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB35_2
+; CHECK-ARM6-NEXT:  .LBB35_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB35_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB35_1
+; CHECK-ARM6-NEXT:    b .LBB35_5
+; CHECK-ARM6-NEXT:  .LBB35_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI35_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_sub_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB35_1
+; CHECK-THUMB7-NEXT:  .LBB35_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB35_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    subs.w r8, r2, #1
+; CHECK-THUMB7-NEXT:    sbc r0, r1, #0
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB35_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB35_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB35_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB35_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB35_2
+; CHECK-THUMB7-NEXT:  .LBB35_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB35_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB35_1
+; CHECK-THUMB7-NEXT:    b .LBB35_5
+; CHECK-THUMB7-NEXT:  .LBB35_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_sub_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI35_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_sub_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI35_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_sub_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_sub_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_and_i64() {
-; COMMON-LABEL: test_and_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_and_8
-; BASELINE64: bl __atomic_fetch_and_8
+; CHECK-ARM8-LABEL: test_and_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB36_1
+; CHECK-ARM8-NEXT:  .LBB36_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB36_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    and r8, r2, #1
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB36_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB36_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB36_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB36_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB36_2
+; CHECK-ARM8-NEXT:  .LBB36_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB36_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB36_1
+; CHECK-ARM8-NEXT:    b .LBB36_5
+; CHECK-ARM8-NEXT:  .LBB36_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_and_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI36_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB36_1
+; CHECK-ARM6-NEXT:  .LBB36_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB36_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    and r8, r2, #1
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI36_0
+; CHECK-ARM6-NEXT:  .LBB36_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB36_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB36_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB36_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB36_2
+; CHECK-ARM6-NEXT:  .LBB36_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB36_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB36_1
+; CHECK-ARM6-NEXT:    b .LBB36_5
+; CHECK-ARM6-NEXT:  .LBB36_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI36_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_and_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB36_1
+; CHECK-THUMB7-NEXT:  .LBB36_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB36_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    and r8, r2, #1
+; CHECK-THUMB7-NEXT:    movs r0, #0
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB36_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB36_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB36_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB36_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB36_2
+; CHECK-THUMB7-NEXT:  .LBB36_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB36_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB36_1
+; CHECK-THUMB7-NEXT:    b .LBB36_5
+; CHECK-THUMB7-NEXT:  .LBB36_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_and_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI36_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_and_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI36_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_and_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_and_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_nand_i64() {
-; COMMON-LABEL: test_nand_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_nand_8
-; BASELINE64: bl __atomic_fetch_nand_8
+; CHECK-ARM8-LABEL: test_nand_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB37_1
+; CHECK-ARM8-NEXT:  .LBB37_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB37_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    mvn r0, r2
+; CHECK-ARM8-NEXT:    mvn r3, #1
+; CHECK-ARM8-NEXT:    orr r8, r0, r3
+; CHECK-ARM8-NEXT:    mvn r0, #0
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB37_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB37_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB37_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB37_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB37_2
+; CHECK-ARM8-NEXT:  .LBB37_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB37_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB37_1
+; CHECK-ARM8-NEXT:    b .LBB37_5
+; CHECK-ARM8-NEXT:  .LBB37_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_nand_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI37_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB37_1
+; CHECK-ARM6-NEXT:  .LBB37_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB37_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    mvn r0, r2
+; CHECK-ARM6-NEXT:    mvn r3, #1
+; CHECK-ARM6-NEXT:    orr r8, r0, r3
+; CHECK-ARM6-NEXT:    mvn r0, #0
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI37_0
+; CHECK-ARM6-NEXT:  .LBB37_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB37_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB37_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB37_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB37_2
+; CHECK-ARM6-NEXT:  .LBB37_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB37_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB37_1
+; CHECK-ARM6-NEXT:    b .LBB37_5
+; CHECK-ARM6-NEXT:  .LBB37_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI37_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_nand_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB37_1
+; CHECK-THUMB7-NEXT:  .LBB37_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB37_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mvn r0, #1
+; CHECK-THUMB7-NEXT:    orn r8, r0, r2
+; CHECK-THUMB7-NEXT:    mov.w r0, #-1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB37_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB37_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB37_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB37_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB37_2
+; CHECK-THUMB7-NEXT:  .LBB37_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB37_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB37_1
+; CHECK-THUMB7-NEXT:    b .LBB37_5
+; CHECK-THUMB7-NEXT:  .LBB37_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_nand_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI37_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_nand_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI37_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_nand_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_nand_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_or_i64() {
-; COMMON-LABEL: test_or_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_or_8
-; BASELINE64: bl __atomic_fetch_or_8
+; CHECK-ARM8-LABEL: test_or_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB38_1
+; CHECK-ARM8-NEXT:  .LBB38_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB38_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    orr r8, r2, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r1
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB38_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB38_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB38_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB38_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB38_2
+; CHECK-ARM8-NEXT:  .LBB38_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB38_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB38_1
+; CHECK-ARM8-NEXT:    b .LBB38_5
+; CHECK-ARM8-NEXT:  .LBB38_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_or_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI38_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB38_1
+; CHECK-ARM6-NEXT:  .LBB38_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB38_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    orr r8, r2, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r1
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI38_0
+; CHECK-ARM6-NEXT:  .LBB38_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB38_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB38_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB38_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB38_2
+; CHECK-ARM6-NEXT:  .LBB38_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB38_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB38_1
+; CHECK-ARM6-NEXT:    b .LBB38_5
+; CHECK-ARM6-NEXT:  .LBB38_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI38_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_or_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB38_1
+; CHECK-THUMB7-NEXT:  .LBB38_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB38_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    orr r8, r2, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB38_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB38_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB38_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB38_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB38_2
+; CHECK-THUMB7-NEXT:  .LBB38_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB38_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB38_1
+; CHECK-THUMB7-NEXT:    b .LBB38_5
+; CHECK-THUMB7-NEXT:  .LBB38_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_or_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI38_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_or_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI38_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_or_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_or_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_xor_i64() {
-; COMMON-LABEL: test_xor_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_xor_8
-; BASELINE64: bl __atomic_fetch_xor_8
+; CHECK-ARM8-LABEL: test_xor_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB39_1
+; CHECK-ARM8-NEXT:  .LBB39_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB39_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    eor r8, r2, #1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r1
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB39_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB39_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB39_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB39_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB39_2
+; CHECK-ARM8-NEXT:  .LBB39_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB39_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB39_1
+; CHECK-ARM8-NEXT:    b .LBB39_5
+; CHECK-ARM8-NEXT:  .LBB39_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_xor_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI39_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB39_1
+; CHECK-ARM6-NEXT:  .LBB39_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB39_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    eor r8, r2, #1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r1
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI39_0
+; CHECK-ARM6-NEXT:  .LBB39_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB39_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB39_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB39_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB39_2
+; CHECK-ARM6-NEXT:  .LBB39_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB39_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB39_1
+; CHECK-ARM6-NEXT:    b .LBB39_5
+; CHECK-ARM6-NEXT:  .LBB39_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI39_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_xor_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB39_1
+; CHECK-THUMB7-NEXT:  .LBB39_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB39_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    eor r8, r2, #1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB39_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB39_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB39_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB39_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB39_2
+; CHECK-THUMB7-NEXT:  .LBB39_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB39_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB39_1
+; CHECK-THUMB7-NEXT:    b .LBB39_5
+; CHECK-THUMB7-NEXT:  .LBB39_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_xor_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI39_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_xor_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI39_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_xor_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r7, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #8
+; CHECK-THUMB8BASE-NEXT:    sub sp, #8
+; CHECK-THUMB8BASE-NEXT:    movs r3, #0
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movs r2, #1
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_fetch_xor_8
+; CHECK-THUMB8BASE-NEXT:    add sp, #8
+; CHECK-THUMB8BASE-NEXT:    pop {r7, pc}
 entry:
   %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 
 define i64 @test_max_i64() {
-; COMMON-LABEL: test_max_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_max_8
-; BASELINE64: bl __atomic_compare_exchange_8
+; CHECK-ARM8-LABEL: test_max_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB40_1
+; CHECK-ARM8-NEXT:  .LBB40_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB40_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    rsbs r0, r2, #1
+; CHECK-ARM8-NEXT:    rscs r0, r1, #0
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    movwlt r0, #1
+; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r0, r1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB40_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB40_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB40_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB40_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB40_2
+; CHECK-ARM8-NEXT:  .LBB40_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB40_1
+; CHECK-ARM8-NEXT:    b .LBB40_5
+; CHECK-ARM8-NEXT:  .LBB40_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_max_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI40_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB40_1
+; CHECK-ARM6-NEXT:  .LBB40_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB40_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    rsbs r0, r2, #1
+; CHECK-ARM6-NEXT:    rscs r0, r1, #0
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    movlt r0, #1
+; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r0, r1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI40_0
+; CHECK-ARM6-NEXT:  .LBB40_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB40_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB40_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB40_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB40_2
+; CHECK-ARM6-NEXT:  .LBB40_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB40_1
+; CHECK-ARM6-NEXT:    b .LBB40_5
+; CHECK-ARM6-NEXT:  .LBB40_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI40_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_max_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB40_1
+; CHECK-THUMB7-NEXT:  .LBB40_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB40_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    rsbs.w r0, r2, #1
+; CHECK-THUMB7-NEXT:    mov.w r0, #0
+; CHECK-THUMB7-NEXT:    sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    it lt
+; CHECK-THUMB7-NEXT:    movlt r0, #1
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r0, r1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB40_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB40_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB40_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB40_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB40_2
+; CHECK-THUMB7-NEXT:  .LBB40_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB40_1
+; CHECK-THUMB7-NEXT:    b .LBB40_5
+; CHECK-THUMB7-NEXT:  .LBB40_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_max_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI40_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_max_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI40_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_max_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #72
+; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB40_1
+; CHECK-THUMB8BASE-NEXT:  .LBB40_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #60] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r3, r0, r3
+; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB40_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB40_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB40_5
+; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB40_5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB40_7
+; CHECK-THUMB8BASE-NEXT:  @ %bb.6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB40_7: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_compare_exchange_8
+; CHECK-THUMB8BASE-NEXT:    mov r2, r0
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    beq .LBB40_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB40_8
+; CHECK-THUMB8BASE-NEXT:  .LBB40_8: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #72
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_min_i64() {
-; COMMON-LABEL: test_min_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_min_8
-; BASELINE64: bl __atomic_compare_exchange_8
+; CHECK-ARM8-LABEL: test_min_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB41_1
+; CHECK-ARM8-NEXT:  .LBB41_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB41_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    subs r0, r2, #2
+; CHECK-ARM8-NEXT:    sbcs r0, r1, #0
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    movwlt r0, #1
+; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r0, r1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB41_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB41_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB41_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB41_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB41_2
+; CHECK-ARM8-NEXT:  .LBB41_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB41_1
+; CHECK-ARM8-NEXT:    b .LBB41_5
+; CHECK-ARM8-NEXT:  .LBB41_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_min_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI41_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB41_1
+; CHECK-ARM6-NEXT:  .LBB41_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB41_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    subs r0, r2, #2
+; CHECK-ARM6-NEXT:    sbcs r0, r1, #0
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    movlt r0, #1
+; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r0, r1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI41_0
+; CHECK-ARM6-NEXT:  .LBB41_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB41_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB41_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB41_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB41_2
+; CHECK-ARM6-NEXT:  .LBB41_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB41_1
+; CHECK-ARM6-NEXT:    b .LBB41_5
+; CHECK-ARM6-NEXT:  .LBB41_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI41_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_min_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB41_1
+; CHECK-THUMB7-NEXT:  .LBB41_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB41_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    subs r0, r2, #2
+; CHECK-THUMB7-NEXT:    sbcs r0, r1, #0
+; CHECK-THUMB7-NEXT:    mov.w r0, #0
+; CHECK-THUMB7-NEXT:    it lt
+; CHECK-THUMB7-NEXT:    movlt r0, #1
+; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r0, r1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB41_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB41_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB41_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB41_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB41_2
+; CHECK-THUMB7-NEXT:  .LBB41_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB41_1
+; CHECK-THUMB7-NEXT:    b .LBB41_5
+; CHECK-THUMB7-NEXT:  .LBB41_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_min_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI41_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_min_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI41_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_min_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #72
+; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB41_1
+; CHECK-THUMB8BASE-NEXT:  .LBB41_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r3, r3, #2
+; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB41_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB41_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB41_5
+; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB41_5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB41_7
+; CHECK-THUMB8BASE-NEXT:  @ %bb.6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB41_7: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_compare_exchange_8
+; CHECK-THUMB8BASE-NEXT:    mov r2, r0
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    beq .LBB41_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB41_8
+; CHECK-THUMB8BASE-NEXT:  .LBB41_8: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #72
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_umax_i64() {
-; COMMON-LABEL: test_umax_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_umax_8
-; BASELINE64: bl __atomic_compare_exchange_8
+; CHECK-ARM8-LABEL: test_umax_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB42_1
+; CHECK-ARM8-NEXT:  .LBB42_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB42_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    rsbs r0, r2, #1
+; CHECK-ARM8-NEXT:    rscs r0, r1, #0
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    movwlo r0, #1
+; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r0, r1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB42_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB42_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB42_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB42_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB42_2
+; CHECK-ARM8-NEXT:  .LBB42_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB42_1
+; CHECK-ARM8-NEXT:    b .LBB42_5
+; CHECK-ARM8-NEXT:  .LBB42_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umax_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI42_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB42_1
+; CHECK-ARM6-NEXT:  .LBB42_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB42_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    rsbs r0, r2, #1
+; CHECK-ARM6-NEXT:    rscs r0, r1, #0
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    movlo r0, #1
+; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r0, r1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI42_0
+; CHECK-ARM6-NEXT:  .LBB42_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB42_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB42_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB42_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB42_2
+; CHECK-ARM6-NEXT:  .LBB42_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB42_1
+; CHECK-ARM6-NEXT:    b .LBB42_5
+; CHECK-ARM6-NEXT:  .LBB42_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI42_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_umax_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB42_1
+; CHECK-THUMB7-NEXT:  .LBB42_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB42_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    rsbs.w r0, r2, #1
+; CHECK-THUMB7-NEXT:    mov.w r0, #0
+; CHECK-THUMB7-NEXT:    sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    it lo
+; CHECK-THUMB7-NEXT:    movlo r0, #1
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r0, r1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB42_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB42_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB42_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB42_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB42_2
+; CHECK-THUMB7-NEXT:  .LBB42_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB42_1
+; CHECK-THUMB7-NEXT:    b .LBB42_5
+; CHECK-THUMB7-NEXT:  .LBB42_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_umax_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI42_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umax_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI42_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_umax_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #72
+; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB42_1
+; CHECK-THUMB8BASE-NEXT:  .LBB42_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #60] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #36] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r1, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r3, r0, r3
+; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blo .LBB42_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB42_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB42_5
+; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB42_5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB42_7
+; CHECK-THUMB8BASE-NEXT:  @ %bb.6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB42_7: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB42_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_compare_exchange_8
+; CHECK-THUMB8BASE-NEXT:    mov r2, r0
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    beq .LBB42_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB42_8
+; CHECK-THUMB8BASE-NEXT:  .LBB42_8: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #72
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic
   ret i64 %0
 }
 define i64 @test_umin_i64() {
-; COMMON-LABEL: test_umin_i64:
-; EXPAND64: ldrexd
-; EXPAND64-NOT: str
-; EXPAND64: strexd
-; THUMB1: bl __sync_fetch_and_umin_8
-; BASELINE64: bl __atomic_compare_exchange_8
+; CHECK-ARM8-LABEL: test_umin_i64:
+; CHECK-ARM8:       @ %bb.0: @ %entry
+; CHECK-ARM8-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM8-NEXT:    .pad #16
+; CHECK-ARM8-NEXT:    sub sp, sp, #16
+; CHECK-ARM8-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:    ldr r1, [r0]
+; CHECK-ARM8-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    b .LBB43_1
+; CHECK-ARM8-NEXT:  .LBB43_1: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM8-NEXT:    @ Child Loop BB43_2 Depth 2
+; CHECK-ARM8-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    mov r6, r2
+; CHECK-ARM8-NEXT:    mov r7, r1
+; CHECK-ARM8-NEXT:    subs r0, r2, #2
+; CHECK-ARM8-NEXT:    sbcs r0, r1, #0
+; CHECK-ARM8-NEXT:    mov r0, #0
+; CHECK-ARM8-NEXT:    movwlo r0, #1
+; CHECK-ARM8-NEXT:    mov r8, #1
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r8, r2
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    movne r0, r1
+; CHECK-ARM8-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM8-NEXT:    mov r9, r0
+; CHECK-ARM8-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-ARM8-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-ARM8-NEXT:  .LBB43_2: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ Parent Loop BB43_1 Depth=1
+; CHECK-ARM8-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM8-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM8-NEXT:    cmp r4, r6
+; CHECK-ARM8-NEXT:    cmpeq r5, r7
+; CHECK-ARM8-NEXT:    bne .LBB43_4
+; CHECK-ARM8-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB43_2 Depth=2
+; CHECK-ARM8-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM8-NEXT:    cmp r0, #0
+; CHECK-ARM8-NEXT:    bne .LBB43_2
+; CHECK-ARM8-NEXT:  .LBB43_4: @ %atomicrmw.start
+; CHECK-ARM8-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-ARM8-NEXT:    mov r0, r5
+; CHECK-ARM8-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r3, r0, r1
+; CHECK-ARM8-NEXT:    mov r1, r4
+; CHECK-ARM8-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    eor r2, r1, r2
+; CHECK-ARM8-NEXT:    orr r2, r2, r3
+; CHECK-ARM8-NEXT:    cmp r2, #0
+; CHECK-ARM8-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM8-NEXT:    bne .LBB43_1
+; CHECK-ARM8-NEXT:    b .LBB43_5
+; CHECK-ARM8-NEXT:  .LBB43_5: @ %atomicrmw.end
+; CHECK-ARM8-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM8-NEXT:    add sp, sp, #16
+; CHECK-ARM8-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-ARM6-LABEL: test_umin_i64:
+; CHECK-ARM6:       @ %bb.0: @ %entry
+; CHECK-ARM6-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-ARM6-NEXT:    .pad #16
+; CHECK-ARM6-NEXT:    sub sp, sp, #16
+; CHECK-ARM6-NEXT:    ldr r0, .LCPI43_0
+; CHECK-ARM6-NEXT:    ldr r1, [r0]
+; CHECK-ARM6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    b .LBB43_1
+; CHECK-ARM6-NEXT:  .LBB43_1: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-ARM6-NEXT:    @ Child Loop BB43_2 Depth 2
+; CHECK-ARM6-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    mov r6, r2
+; CHECK-ARM6-NEXT:    mov r7, r1
+; CHECK-ARM6-NEXT:    subs r0, r2, #2
+; CHECK-ARM6-NEXT:    sbcs r0, r1, #0
+; CHECK-ARM6-NEXT:    mov r0, #0
+; CHECK-ARM6-NEXT:    movlo r0, #1
+; CHECK-ARM6-NEXT:    mov r8, #1
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r8, r2
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    movne r0, r1
+; CHECK-ARM6-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-ARM6-NEXT:    mov r9, r0
+; CHECK-ARM6-NEXT:    ldr r3, .LCPI43_0
+; CHECK-ARM6-NEXT:  .LBB43_2: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ Parent Loop BB43_1 Depth=1
+; CHECK-ARM6-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-ARM6-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-ARM6-NEXT:    cmp r4, r6
+; CHECK-ARM6-NEXT:    cmpeq r5, r7
+; CHECK-ARM6-NEXT:    bne .LBB43_4
+; CHECK-ARM6-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB43_2 Depth=2
+; CHECK-ARM6-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-ARM6-NEXT:    cmp r0, #0
+; CHECK-ARM6-NEXT:    bne .LBB43_2
+; CHECK-ARM6-NEXT:  .LBB43_4: @ %atomicrmw.start
+; CHECK-ARM6-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-ARM6-NEXT:    mov r0, r5
+; CHECK-ARM6-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r3, r0, r1
+; CHECK-ARM6-NEXT:    mov r1, r4
+; CHECK-ARM6-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    eor r2, r1, r2
+; CHECK-ARM6-NEXT:    orr r2, r2, r3
+; CHECK-ARM6-NEXT:    cmp r2, #0
+; CHECK-ARM6-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-ARM6-NEXT:    bne .LBB43_1
+; CHECK-ARM6-NEXT:    b .LBB43_5
+; CHECK-ARM6-NEXT:  .LBB43_5: @ %atomicrmw.end
+; CHECK-ARM6-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-ARM6-NEXT:    add sp, sp, #16
+; CHECK-ARM6-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+; CHECK-ARM6-NEXT:    .p2align 2
+; CHECK-ARM6-NEXT:  @ %bb.6:
+; CHECK-ARM6-NEXT:  .LCPI43_0:
+; CHECK-ARM6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB7-LABEL: test_umin_i64:
+; CHECK-THUMB7:       @ %bb.0: @ %entry
+; CHECK-THUMB7-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-THUMB7-NEXT:    .pad #16
+; CHECK-THUMB7-NEXT:    sub sp, #16
+; CHECK-THUMB7-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:    ldr r1, [r0]
+; CHECK-THUMB7-NEXT:    ldr r0, [r0, #4]
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    b .LBB43_1
+; CHECK-THUMB7-NEXT:  .LBB43_1: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-THUMB7-NEXT:    @ Child Loop BB43_2 Depth 2
+; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    mov r6, r2
+; CHECK-THUMB7-NEXT:    mov r7, r1
+; CHECK-THUMB7-NEXT:    subs r0, r2, #2
+; CHECK-THUMB7-NEXT:    sbcs r0, r1, #0
+; CHECK-THUMB7-NEXT:    mov.w r0, #0
+; CHECK-THUMB7-NEXT:    it lo
+; CHECK-THUMB7-NEXT:    movlo r0, #1
+; CHECK-THUMB7-NEXT:    mov.w r8, #1
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r8, r2
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    it ne
+; CHECK-THUMB7-NEXT:    movne r0, r1
+; CHECK-THUMB7-NEXT:    @ kill: def $r8 killed $r8 def $r8_r9
+; CHECK-THUMB7-NEXT:    mov r9, r0
+; CHECK-THUMB7-NEXT:    movw r3, :lower16:atomic_i64
+; CHECK-THUMB7-NEXT:    movt r3, :upper16:atomic_i64
+; CHECK-THUMB7-NEXT:  .LBB43_2: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ Parent Loop BB43_1 Depth=1
+; CHECK-THUMB7-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-THUMB7-NEXT:    ldrexd r4, r5, [r3]
+; CHECK-THUMB7-NEXT:    cmp r4, r6
+; CHECK-THUMB7-NEXT:    it eq
+; CHECK-THUMB7-NEXT:    cmpeq r5, r7
+; CHECK-THUMB7-NEXT:    bne .LBB43_4
+; CHECK-THUMB7-NEXT:  @ %bb.3: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB43_2 Depth=2
+; CHECK-THUMB7-NEXT:    strexd r0, r8, r9, [r3]
+; CHECK-THUMB7-NEXT:    cmp r0, #0
+; CHECK-THUMB7-NEXT:    bne .LBB43_2
+; CHECK-THUMB7-NEXT:  .LBB43_4: @ %atomicrmw.start
+; CHECK-THUMB7-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB7-NEXT:    mov r0, r5
+; CHECK-THUMB7-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eor.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov r1, r4
+; CHECK-THUMB7-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    eors r2, r1
+; CHECK-THUMB7-NEXT:    orrs r2, r3
+; CHECK-THUMB7-NEXT:    cmp r2, #0
+; CHECK-THUMB7-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB7-NEXT:    bne .LBB43_1
+; CHECK-THUMB7-NEXT:    b .LBB43_5
+; CHECK-THUMB7-NEXT:  .LBB43_5: @ %atomicrmw.end
+; CHECK-THUMB7-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-THUMB7-NEXT:    add sp, #16
+; CHECK-THUMB7-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+;
+; CHECK-THUMB6-LABEL: test_umin_i64:
+; CHECK-THUMB6:       @ %bb.0: @ %entry
+; CHECK-THUMB6-NEXT:    .save {r7, lr}
+; CHECK-THUMB6-NEXT:    push {r7, lr}
+; CHECK-THUMB6-NEXT:    ldr r0, .LCPI43_0
+; CHECK-THUMB6-NEXT:    movs r2, #1
+; CHECK-THUMB6-NEXT:    movs r3, #0
+; CHECK-THUMB6-NEXT:    bl __sync_fetch_and_umin_8
+; CHECK-THUMB6-NEXT:    pop {r7, pc}
+; CHECK-THUMB6-NEXT:    .p2align 2
+; CHECK-THUMB6-NEXT:  @ %bb.1:
+; CHECK-THUMB6-NEXT:  .LCPI43_0:
+; CHECK-THUMB6-NEXT:    .long atomic_i64
+;
+; CHECK-THUMB8BASE-LABEL: test_umin_i64:
+; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
+; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
+; CHECK-THUMB8BASE-NEXT:    .pad #72
+; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    b .LBB43_1
+; CHECK-THUMB8BASE-NEXT:  .LBB43_1: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r0, #1
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    movs r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    subs r3, r3, #2
+; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blo .LBB43_3
+; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB43_3: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB43_5
+; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB43_5: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB43_7
+; CHECK-THUMB8BASE-NEXT:  @ %bb.6: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:  .LBB43_7: @ %atomicrmw.start
+; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB43_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp]
+; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
+; CHECK-THUMB8BASE-NEXT:    add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT:    bl __atomic_compare_exchange_8
+; CHECK-THUMB8BASE-NEXT:    mov r2, r0
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #64]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    cmp r2, #0
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    beq .LBB43_1
+; CHECK-THUMB8BASE-NEXT:    b .LBB43_8
+; CHECK-THUMB8BASE-NEXT:  .LBB43_8: @ %atomicrmw.end
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #72
+; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic
   ret i64 %0

diff  --git a/llvm/test/CodeGen/ARM/cmpxchg.mir b/llvm/test/CodeGen/ARM/cmpxchg.mir
index dd640457fb0b1..2b8a5dadc1a98 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg.mir
+++ b/llvm/test/CodeGen/ARM/cmpxchg.mir
@@ -9,38 +9,50 @@ body: |
     liveins: $r0_r1, $r4_r5, $r3, $lr
     ; CHECK-LABEL: name: func
     ; CHECK: successors: %bb.1(0x80000000)
-    ; CHECK: liveins: $r0_r1, $r4_r5, $r3, $lr
-    ; CHECK: .1:
-    ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; CHECK: liveins: $r4_r5, $r3
-    ; CHECK: $r0_r1 = LDREXD $r3, 14 /* CC::al */, $noreg
-    ; CHECK: CMPrr killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; CHECK: CMPrr killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
-    ; CHECK: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr
-    ; CHECK: .2:
-    ; CHECK: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; CHECK: liveins: $r4_r5, $r3
-    ; CHECK: early-clobber $r2 = STREXD $r4_r5, $r3, 14 /* CC::al */, $noreg
-    ; CHECK: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; CHECK: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr
-    ; CHECK: .3:
+    ; CHECK-NEXT: liveins: $r0_r1, $r4_r5, $r3, $lr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .1:
+    ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+    ; CHECK-NEXT: liveins: $r4_r5, $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r0_r1 = LDREXD $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: CMPrr killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: CMPrr killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
+    ; CHECK-NEXT: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .2:
+    ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    ; CHECK-NEXT: liveins: $r4_r5, $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $r2 = STREXD $r4_r5, $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .3:
     ; THUMB-LABEL: name: func
     ; THUMB: successors: %bb.1(0x80000000)
-    ; THUMB: liveins: $r0_r1, $r4_r5, $r3, $lr
-    ; THUMB: .1:
-    ; THUMB: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; THUMB: liveins: $r4, $r5, $r3
-    ; THUMB: $r0, $r1 = t2LDREXD $r3, 14 /* CC::al */, $noreg
-    ; THUMB: tCMPhir killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; THUMB: tCMPhir killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
-    ; THUMB: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
-    ; THUMB: .2:
-    ; THUMB: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; THUMB: liveins: $r4, $r5, $r3
-    ; THUMB: early-clobber $r2 = t2STREXD $r4, $r5, $r3, 14 /* CC::al */, $noreg
-    ; THUMB: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; THUMB: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
-    ; THUMB: .3:
+    ; THUMB-NEXT: liveins: $r0_r1, $r4_r5, $r3, $lr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .1:
+    ; THUMB-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+    ; THUMB-NEXT: liveins: $r4, $r5, $r3
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: $r0, $r1 = t2LDREXD $r3, 14 /* CC::al */, $noreg
+    ; THUMB-NEXT: tCMPhir killed $r0, $r4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; THUMB-NEXT: tCMPhir killed $r1, $r5, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr
+    ; THUMB-NEXT: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .2:
+    ; THUMB-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    ; THUMB-NEXT: liveins: $r4, $r5, $r3
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: early-clobber $r2 = t2STREXD $r4, $r5, $r3, 14 /* CC::al */, $noreg
+    ; THUMB-NEXT: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; THUMB-NEXT: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .3:
     dead early-clobber renamable $r0_r1, dead early-clobber renamable $r2 = CMP_SWAP_64 killed renamable $r3, killed renamable $r4_r5, renamable $r4_r5 :: (volatile load store monotonic monotonic (s64))
 ...
 ---
@@ -51,35 +63,47 @@ body: |
     liveins: $r1, $r2, $r3, $r12, $lr
     ; CHECK-LABEL: name: func2
     ; CHECK: successors: %bb.1(0x80000000)
-    ; CHECK: liveins: $r1, $r2, $r3, $r12, $lr
-    ; CHECK: .1:
-    ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; CHECK: liveins: $lr, $r3, $r12
-    ; CHECK: $r1 = LDREX $r3, 14 /* CC::al */, $noreg
-    ; CHECK: CMPrr killed $r1, $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; CHECK: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr
-    ; CHECK: .2:
-    ; CHECK: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; CHECK: liveins: $lr, $r3, $r12
-    ; CHECK: early-clobber $r2 = STREX $lr, $r3, 14 /* CC::al */, $noreg
-    ; CHECK: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; CHECK: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr
-    ; CHECK: .3:
+    ; CHECK-NEXT: liveins: $r1, $r2, $r3, $r12, $lr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .1:
+    ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+    ; CHECK-NEXT: liveins: $lr, $r3, $r12
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $r1 = LDREX $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: CMPrr killed $r1, $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: Bcc %bb.3, 1 /* CC::ne */, killed $cpsr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .2:
+    ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    ; CHECK-NEXT: liveins: $lr, $r3, $r12
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $r2 = STREX $lr, $r3, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; CHECK-NEXT: Bcc %bb.1, 1 /* CC::ne */, killed $cpsr
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .3:
     ; THUMB-LABEL: name: func2
     ; THUMB: successors: %bb.1(0x80000000)
-    ; THUMB: liveins: $r1, $r2, $r3, $r12, $lr
-    ; THUMB: .1:
-    ; THUMB: successors: %bb.3(0x40000000), %bb.2(0x40000000)
-    ; THUMB: liveins: $lr, $r3, $r12
-    ; THUMB: $r1 = t2LDREX $r3, 0, 14 /* CC::al */, $noreg
-    ; THUMB: tCMPhir killed $r1, $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; THUMB: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
-    ; THUMB: .2:
-    ; THUMB: successors: %bb.1(0x40000000), %bb.3(0x40000000)
-    ; THUMB: liveins: $lr, $r3, $r12
-    ; THUMB: early-clobber $r2 = t2STREX $lr, $r3, 0, 14 /* CC::al */, $noreg
-    ; THUMB: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    ; THUMB: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
-    ; THUMB: .3:
+    ; THUMB-NEXT: liveins: $r1, $r2, $r3, $r12, $lr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .1:
+    ; THUMB-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+    ; THUMB-NEXT: liveins: $lr, $r3, $r12
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: $r1 = t2LDREX $r3, 0, 14 /* CC::al */, $noreg
+    ; THUMB-NEXT: tCMPhir killed $r1, $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; THUMB-NEXT: tBcc %bb.3, 1 /* CC::ne */, killed $cpsr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .2:
+    ; THUMB-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+    ; THUMB-NEXT: liveins: $lr, $r3, $r12
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: early-clobber $r2 = t2STREX $lr, $r3, 0, 14 /* CC::al */, $noreg
+    ; THUMB-NEXT: t2CMPri killed $r2, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    ; THUMB-NEXT: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
+    ; THUMB-NEXT: {{  $}}
+    ; THUMB-NEXT: .3:
     dead early-clobber renamable $r1, dead early-clobber renamable $r2 = CMP_SWAP_32 killed renamable $r3, killed renamable $r12, killed renamable $lr
 ...


        


More information about the llvm-commits mailing list