[llvm] ff5a9c3 - [CodeGen] Regenerate test checks (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 10 05:10:09 PST 2022


Author: Nikita Popov
Date: 2022-02-10T14:10:00+01:00
New Revision: ff5a9c3c653e6bea0af257b6edbf58ae0ce0346e

URL: https://github.com/llvm/llvm-project/commit/ff5a9c3c653e6bea0af257b6edbf58ae0ce0346e
DIFF: https://github.com/llvm/llvm-project/commit/ff5a9c3c653e6bea0af257b6edbf58ae0ce0346e.diff

LOG: [CodeGen] Regenerate test checks (NFC)

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/swifterror.ll
    llvm/test/CodeGen/ARM/swifterror.ll
    llvm/test/CodeGen/X86/swifterror.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index 71bac8556d01d..62f32a3083573 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -1,7 +1,8 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-AARCH64 %s
-; RUN: llc -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-AARCH64 %s
+; RUN: llc -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0-AARCH64 %s
 ; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-ARM64_32 %s
-; RUN: llc -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-ARM64_32 %s
+; RUN: llc -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0-ARM64_32 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
@@ -11,20 +12,53 @@ declare void @free(i8*)
 ; that takes a swifterror parameter and "caller" is the caller of "foo".
 define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo:
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE-NOT: x21
-
-; CHECK-O0-LABEL: foo:
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
-; CHECK-O0-NOT: x21
-; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x0, #8]
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
+
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -36,24 +70,136 @@ entry:
 
 ; "caller" calls "foo" that takes a swifterror parameter.
 define float @caller(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w0
+; CHECK-APPLE-AARCH64-LABEL: caller:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #64
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #48
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB1_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB1_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #64
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #64
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #48
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB1_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB1_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #64
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #64
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #48
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB1_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB1_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #64
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #64
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB1_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB1_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #64
+; CHECK-O0-ARM64_32-NEXT:    ret
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
-
-; CHECK-O0-LABEL: caller:
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
+
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -74,27 +220,176 @@ handler:
 
 ; "caller2" is the caller of "foo", it calls "foo" inside a loop.
 define float @caller2(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller2:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w21
-; CHECK-APPLE: fcmp s0, [[CMP]]
-; CHECK-APPLE: b.le
+; CHECK-APPLE-AARCH64-LABEL: caller2:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80
+; CHECK-APPLE-AARCH64-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #64
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset b8, -56
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset b9, -64
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    fmov s8, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-APPLE-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB2_4
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.2: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-AARCH64-NEXT:    fcmp s0, s8
+; CHECK-APPLE-AARCH64-NEXT:    b.le LBB2_1
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x21, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB2_4: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp d9, d8, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #80
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller2:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #80
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #64
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-O0-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo
+; CHECK-O0-AARCH64-NEXT:    str s0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB2_4
+; CHECK-O0-AARCH64-NEXT:  ; %bb.2: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldr s0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    fcmp s0, s1
+; CHECK-O0-AARCH64-NEXT:    b.le LBB2_1
+; CHECK-O0-AARCH64-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB2_4: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #80
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller2:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset b8, -56
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset b9, -64
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s8, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-APPLE-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w21, LBB2_4
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.2: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-ARM64_32-NEXT:    fcmp s0, s8
+; CHECK-APPLE-ARM64_32-NEXT:    b.le LBB2_1
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x21, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB2_4: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp d9, d8, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller2:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #80
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-O0-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo
+; CHECK-O0-ARM64_32-NEXT:    str s0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB2_4
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.2: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr s0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    fcmp s0, s1
+; CHECK-O0-ARM64_32-NEXT:    b.le LBB2_1
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB2_4: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #80
+; CHECK-O0-ARM64_32-NEXT:    ret
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE: bl {{.*}}free
-
-; CHECK-O0-LABEL: caller2:
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
+
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -122,28 +417,83 @@ handler:
 ; under a certain condition.
 define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: foo_if:
-; CHECK-APPLE: cbz w0
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE: ret
-
-; CHECK-O0-LABEL: foo_if:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    cbz w0, LBB3_2
+; CHECK-APPLE-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+; CHECK-APPLE-NEXT:  LBB3_2:
+; CHECK-APPLE-NEXT:    movi d0, #0000000000000000
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_if:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbz w0, LBB3_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    ret
+; CHECK-O0-AARCH64-NEXT:  LBB3_2: ; %normal
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_if:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cbz w0, LBB3_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    ret
+; CHECK-O0-ARM64_32-NEXT:  LBB3_2: ; %normal
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    ret
+
 ; spill x21
-; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]]
-; CHECK-O0: cbz w0
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
-; CHECK-O0: mov [[ID2:w[0-9]+]], #1
-; CHECK-O0: strb [[ID2]], [x0, #8]
-; CHECK-O0: ret
 ; reload from stack
-; CHECK-O0: ldr x21, [sp, [[SLOT]]]
-; CHECK-O0: ret
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -164,59 +514,137 @@ normal:
 ; under a certain condition inside a loop.
 define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-APPLE-LABEL: foo_loop:
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE: fcmp
-; CHECK-APPLE: b.gt
-; CHECK-APPLE: cbz
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: strb w{{.*}}, [x0, #8]
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE: ret
-
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp d9, d8, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #32
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset b8, -40
+; CHECK-APPLE-NEXT:    .cfi_offset b9, -48
+; CHECK-APPLE-NEXT:    fmov s8, s0
+; CHECK-APPLE-NEXT:    mov w19, w0
+; CHECK-APPLE-NEXT:    mov x0, x21
+; CHECK-APPLE-NEXT:    mov w20, #1
+; CHECK-APPLE-NEXT:    fmov s9, #1.00000000
+; CHECK-APPLE-NEXT:    b LBB4_2
+; CHECK-APPLE-NEXT:  LBB4_1: ; %bb_cont
+; CHECK-APPLE-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    fcmp s8, s9
+; CHECK-APPLE-NEXT:    b.gt LBB4_4
+; CHECK-APPLE-NEXT:  LBB4_2: ; %bb_loop
+; CHECK-APPLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    cbz w19, LBB4_1
+; CHECK-APPLE-NEXT:  ; %bb.3: ; %gen_error
+; CHECK-APPLE-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    strb w20, [x0, #8]
+; CHECK-APPLE-NEXT:    b LBB4_1
+; CHECK-APPLE-NEXT:  LBB4_4: ; %bb_end
+; CHECK-APPLE-NEXT:    movi d0, #0000000000000000
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp d9, d8, [sp], #48 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
 ; CHECK-O0-AARCH64-LABEL: foo_loop:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str s0, [sp, #16] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur w0, [x29, #-12] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x21, [x29, #-8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    b LBB4_1
+; CHECK-O0-AARCH64-NEXT:  LBB4_1: ; %bb_loop
+; CHECK-O0-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldur w8, [x29, #-12] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x0, [x29, #-8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbz w8, LBB4_3
+; CHECK-O0-AARCH64-NEXT:  ; %bb.2: ; %gen_error
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x9, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9, #8]
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  LBB4_3: ; %bb_cont
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldr s0, [sp, #16] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    fcmp s0, s1
+; CHECK-O0-AARCH64-NEXT:    stur x0, [x29, #-8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    b.le LBB4_1
+; CHECK-O0-AARCH64-NEXT:  ; %bb.4: ; %bb_end
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_loop:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str s0, [sp, #16] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #20] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    b LBB4_1
+; CHECK-O0-ARM64_32-NEXT:  LBB4_1: ; %bb_loop
+; CHECK-O0-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #20] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cbz w8, LBB4_3
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.2: ; %gen_error
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x9, x0
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x0 killed $x9
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x9
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9, #8]
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  LBB4_3: ; %bb_cont
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr s0, [sp, #16] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    fcmp s0, s1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    b.le LBB4_1
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.4: ; %bb_end
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    ret
+
 ; spill x21
-; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]]
-; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]]
-; CHECK-O0-AARCH64: [[BB1]]:
-; CHECK-O0-AARCH64: ldur    x0, [x29, [[SLOT]]]
-; CHECK-O0-AARCH64: str     x0, [sp, [[SLOT2:#[0-9]+]]]
-; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
-; CHECK-O0-AARCH64: mov w{{.*}}, #16
-; CHECK-O0-AARCH64: malloc
-; CHECK-O0-AARCH64: mov [[ID:x[0-9]+]], x0
-; CHECK-O0-AARCH64: strb w{{.*}}, [{{.*}}[[ID]], #8]
 ; spill x0
-; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2]]]
-; CHECK-O0-AARCH64:[[BB2]]:
-; CHECK-O0-AARCH64: ldr     x0, [sp, [[SLOT2]]]
-; CHECK-O0-AARCH64: fcmp
-; CHECK-O0-AARCH64: stur     x0, [x29, [[SLOT]]]
-; CHECK-O0-AARCH64: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-AARCH64: ldr x21, [sp]
-; CHECK-O0-AARCH64: ret
 
-; CHECK-O0-ARM64_32-LABEL: foo_loop:
 ; spill x21
-; CHECK-O0-ARM64_32: str x21, [sp, [[SLOT:#[0-9]+]]]
-; CHECK-O0-ARM64_32: b [[BB1:[A-Za-z0-9_]*]]
-; CHECK-O0-ARM64_32: [[BB1]]:
-; CHECK-O0-ARM64_32: ldr     x0, [sp, [[SLOT]]]
-; CHECK-O0-ARM64_32: str     x0, [sp, [[SLOT2:#[0-9]+]]]
-; CHECK-O0-ARM64_32: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
-; CHECK-O0-ARM64_32: mov w{{.*}}, #16
-; CHECK-O0-ARM64_32: malloc
-; CHECK-O0-ARM64_32: mov {{.*}}, x0
-; CHECK-O0-ARM64_32: strb w{{.*}},
-; CHECK-O0-ARM64_32:[[BB2]]:
-; CHECK-O0-ARM64_32: ldr     x0, [sp, [[SLOT2]]]
-; CHECK-O0-ARM64_32: str     x0, [sp[[OFFSET:.*]]]
-; CHECK-O0-ARM64_32: fcmp
-; CHECK-O0-ARM64_32: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]]
-; CHECK-O0-ARM64_32: ret
 entry:
   br label %bb_loop
 
@@ -245,27 +673,75 @@ bb_end:
 ; parameter.
 define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo_sret:
-; CHECK-APPLE: mov [[SRET:x[0-9]+]], x8
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK-APPLE-NOT: x21
-
-; CHECK-O0-LABEL: foo_sret:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    mov w19, w0
+; CHECK-APPLE-NEXT:    mov x20, x8
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    str w19, [x20, #4]
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_sret:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    stur w0, [x29, #-4] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x10, x0
+; CHECK-O0-AARCH64-NEXT:    ldur w0, [x29, #-4] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x21, x10
+; CHECK-O0-AARCH64-NEXT:    mov w9, #1
+; CHECK-O0-AARCH64-NEXT:    strb w9, [x10, #8]
+; CHECK-O0-AARCH64-NEXT:    str w0, [x8, #4]
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_sret:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x10, x0
+; CHECK-O0-ARM64_32-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x10
+; CHECK-O0-ARM64_32-NEXT:    mov w9, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w9, [x10, #8]
+; CHECK-O0-ARM64_32-NEXT:    str w0, [x8, #4]
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    ret
+
 ; spill x8
-; CHECK-O0-DAG: str x8
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov	x10, x0
-; CHECK-O0: mov	x21, x10
-; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x10, #8]
 ; reload from stack
-; CHECK-O0: str w{{.*}}, [x8, #4]
-; CHECK-O0-NOT: x21
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -279,32 +755,147 @@ entry:
 
 ; "caller3" calls "foo_sret" that takes a swifterror parameter.
 define float @caller3(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller3:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo_sret
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w0
+; CHECK-APPLE-AARCH64-LABEL: caller3:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #64
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    add x8, sp, #8
+; CHECK-APPLE-AARCH64-NEXT:    mov w0, #1
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo_sret
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB6_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB6_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #80
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller3:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #96
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #80
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    add x8, sp, #40
+; CHECK-O0-AARCH64-NEXT:    mov w0, #1
+; CHECK-O0-AARCH64-NEXT:    bl _foo_sret
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB6_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB6_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #96
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller3:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    add x8, sp, #8
+; CHECK-APPLE-ARM64_32-NEXT:    mov w0, #1
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo_sret
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB6_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB6_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller3:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #96
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    add x8, sp, #40
+; CHECK-O0-ARM64_32-NEXT:    mov w0, #1
+; CHECK-O0-ARM64_32-NEXT:    bl _foo_sret
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB6_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB6_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #96
+; CHECK-O0-ARM64_32-NEXT:    ret
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
 
-; CHECK-O0-LABEL: caller3:
 ; spill x0
-; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo_sret
-; CHECK-O0: mov [[ID2:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
 ; Access part of the error object and save it to error_ref
 ; reload from stack
-; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]]
-; CHECK-O0: ldrb [[CODE:w[0-9]+]]
-; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: bl {{.*}}free
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -328,23 +919,144 @@ handler:
 ; variable number of arguments.
 declare void @llvm.va_start(i8*) nounwind
 define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
-; CHECK-APPLE-LABEL: foo_vararg:
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
+; CHECK-APPLE-AARCH64-LABEL: foo_vararg:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #48
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    mov w0, #16
+; CHECK-APPLE-AARCH64-NEXT:    bl _malloc
+; CHECK-APPLE-AARCH64-NEXT:    mov w8, #1
+; CHECK-APPLE-AARCH64-NEXT:    ldr w9, [x29, #16]
+; CHECK-APPLE-AARCH64-NEXT:    add x10, x29, #16
+; CHECK-APPLE-AARCH64-NEXT:    ldr w11, [x29, #32]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    add x8, x10, #24
+; CHECK-APPLE-AARCH64-NEXT:    stur w9, [x29, #-12]
+; CHECK-APPLE-AARCH64-NEXT:    ldr w9, [x29, #24]
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, x0
+; CHECK-APPLE-AARCH64-NEXT:    stur x8, [x29, #-8]
+; CHECK-APPLE-AARCH64-NEXT:    stp w11, w9, [sp, #12]
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #48
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_vararg:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    add x8, x29, #16
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-12]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    str w8, [sp, #16]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    str w8, [sp, #12]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: foo_vararg:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #48
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    mov w0, #16
+; CHECK-APPLE-ARM64_32-NEXT:    bl _malloc
+; CHECK-APPLE-ARM64_32-NEXT:    mov w8, #1
+; CHECK-APPLE-ARM64_32-NEXT:    add x9, x29, #16
+; CHECK-APPLE-ARM64_32-NEXT:    orr w10, w9, #0x4
+; CHECK-APPLE-ARM64_32-NEXT:    and x11, x9, #0xfffffff0
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    stur w10, [x29, #-8]
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w8, [x11]
+; CHECK-APPLE-ARM64_32-NEXT:    orr w11, w9, #0x8
+; CHECK-APPLE-ARM64_32-NEXT:    stp w8, w11, [x29, #-12]
+; CHECK-APPLE-ARM64_32-NEXT:    orr w8, w9, #0xc
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w9, [x10]
+; CHECK-APPLE-ARM64_32-NEXT:    stur w8, [x29, #-8]
+; CHECK-APPLE-ARM64_32-NEXT:    str w9, [sp, #16]
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w8, [x11]
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, x0
+; CHECK-APPLE-ARM64_32-NEXT:    str w8, [sp, #12]
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #48
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_vararg:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    add x8, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w8 killed $w8 killed $x8
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #20]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #16]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #12]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    ret
 
 ; First vararg
-; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16]
-; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16
 ; Third vararg
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
-; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8]
 ; Second vararg
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
 
-; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
-; CHECK-APPLE-ARM64_32: strb [[ID]], [x0, #8]
 
 
 entry:
@@ -372,20 +1084,182 @@ entry:
 
 ; "caller4" calls "foo_vararg" that takes a swifterror parameter.
 define float @caller4(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller4:
+; CHECK-APPLE-AARCH64-LABEL: caller4:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #96
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #80
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    mov w8, #10
+; CHECK-APPLE-AARCH64-NEXT:    mov w9, #11
+; CHECK-APPLE-AARCH64-NEXT:    mov w10, #12
+; CHECK-APPLE-AARCH64-NEXT:    stp w9, w8, [sp, #32]
+; CHECK-APPLE-AARCH64-NEXT:    str w10, [sp, #28]
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    stp x9, x10, [sp, #8]
+; CHECK-APPLE-AARCH64-NEXT:    str x8, [sp]
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo_vararg
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB8_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB8_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #96
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller4:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #112
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #96
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    mov w8, #10
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-28]
+; CHECK-O0-AARCH64-NEXT:    mov w8, #11
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-32]
+; CHECK-O0-AARCH64-NEXT:    mov w8, #12
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-36]
+; CHECK-O0-AARCH64-NEXT:    ldur w8, [x29, #-28]
+; CHECK-O0-AARCH64-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-AARCH64-NEXT:    ldur w9, [x29, #-32]
+; CHECK-O0-AARCH64-NEXT:    mov w10, w9
+; CHECK-O0-AARCH64-NEXT:    ldur w9, [x29, #-36]
+; CHECK-O0-AARCH64-NEXT:    mov w11, w9
+; CHECK-O0-AARCH64-NEXT:    mov x9, sp
+; CHECK-O0-AARCH64-NEXT:    str x11, [x9, #16]
+; CHECK-O0-AARCH64-NEXT:    str x10, [x9, #8]
+; CHECK-O0-AARCH64-NEXT:    str x8, [x9]
+; CHECK-O0-AARCH64-NEXT:    bl _foo_vararg
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB8_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB8_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #112
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller4:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    mov w8, #10
+; CHECK-APPLE-ARM64_32-NEXT:    mov w9, #11
+; CHECK-APPLE-ARM64_32-NEXT:    mov w10, #12
+; CHECK-APPLE-ARM64_32-NEXT:    stp w9, w8, [sp, #20]
+; CHECK-APPLE-ARM64_32-NEXT:    str w10, [sp, #16]
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    mov x9, #11
+; CHECK-APPLE-ARM64_32-NEXT:    movk x9, #12, lsl #32
+; CHECK-APPLE-ARM64_32-NEXT:    stur x9, [sp, #4]
+; CHECK-APPLE-ARM64_32-NEXT:    str w8, [sp]
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo_vararg
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB8_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB8_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller4:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #96
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #10
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #56]
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #11
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #52]
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #12
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #48]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #56]
+; CHECK-O0-ARM64_32-NEXT:    ldr w10, [sp, #52]
+; CHECK-O0-ARM64_32-NEXT:    ldr w11, [sp, #48]
+; CHECK-O0-ARM64_32-NEXT:    mov x9, sp
+; CHECK-O0-ARM64_32-NEXT:    str w11, [x9, #8]
+; CHECK-O0-ARM64_32-NEXT:    str w10, [x9, #4]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:    bl _foo_vararg
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB8_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB8_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #96
+; CHECK-O0-ARM64_32-NEXT:    ret
 
-; CHECK-APPLE-AARCH64: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE-AARCH64: mov x21, xzr
-; CHECK-APPLE-AARCH64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK-APPLE-AARCH64: str {{x[0-9]+}}, [sp]
 
-; CHECK-APPLE-AARCH64: bl {{.*}}foo_vararg
-; CHECK-APPLE-AARCH64: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE-AARCH64: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE-AARCH64: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE-AARCH64: bl {{.*}}free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -418,91 +1292,345 @@ handler:
 
 ; Check that we don't blow up on tail calling swifterror argument functions.
 define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcallswifterror:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    bl _tailcallswifterror
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcallswifterror:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    bl _tailcallswifterror
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror_swiftcc:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 
-; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: mov [[REG:x[0-9]+]], x21
-; CHECK-APPLE: nop
-; CHECK-APPLE: mov x21, [[REG]]
 define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: swifterror_clobber:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    mov x8, x21
+; CHECK-APPLE-NEXT:    ; InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ; InlineAsm End
+; CHECK-APPLE-NEXT:    mov x21, x8
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: swifterror_clobber:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm Start
+; CHECK-O0-AARCH64-NEXT:    nop
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm End
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: swifterror_clobber:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm Start
+; CHECK-O0-ARM64_32-NEXT:    nop
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm End
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #16
+; CHECK-O0-ARM64_32-NEXT:    ret
   call void asm sideeffect "nop", "~{x21}"()
   ret void
 }
 
-; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: stp {{.*}}x21
-; CHECK-APPLE: nop
-; CHECK-APPLE: ldp  {{.*}}x21
 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
+; CHECK-APPLE-LABEL: swifterror_reg_clobber:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w21, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -32
+; CHECK-APPLE-NEXT:    ; InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ; InlineAsm End
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x22, x21, [sp], #32 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: swifterror_reg_clobber:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm Start
+; CHECK-O0-AARCH64-NEXT:    nop
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm End
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp], #32 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: swifterror_reg_clobber:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -16
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm Start
+; CHECK-O0-ARM64_32-NEXT:    nop
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm End
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
   call void asm sideeffect "nop", "~{x21}"()
   ret void
 }
-; CHECK-APPLE-LABEL: params_in_reg
-; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
-; CHECK-APPLE:  stp     x21, x28, [sp
-; CHECK-APPLE:  stp     x27, x26, [sp
-; CHECK-APPLE:  stp     x25, x24, [sp
-; CHECK-APPLE:  stp     x23, x22, [sp
-; CHECK-APPLE:  stp     x20, x19, [sp
-; CHECK-APPLE:  stp     x29, x30, [sp
-; CHECK-APPLE:  str     x20, [sp
-; Store argument registers.
-; CHECK-APPLE:  mov      x23, x7
-; CHECK-APPLE:  mov      x24, x6
-; CHECK-APPLE:  mov      x25, x5
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x3
-; CHECK-APPLE:  mov      x28, x2
-; CHECK-APPLE:  mov      x19, x1
-; CHECK-APPLE:  mov      x22, x0
-; Setup call.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov      x20, xzr
-; CHECK-APPLE:  mov      x21, xzr
-; CHECK-APPLE:  bl      _params_in_reg2
-; Restore original arguments for next call.
-; CHECK-APPLE:  mov      x0, x22
-; CHECK-APPLE:  mov      x1, x19
-; CHECK-APPLE:  mov      x2, x28
-; CHECK-APPLE:  mov      x3, x27
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x25
-; CHECK-APPLE:  mov      x6, x24
-; CHECK-APPLE:  mov      x7, x23
-; Restore original swiftself argument and swifterror %err.
-; CHECK-APPLE:  ldp             x20, x21, [sp
-; CHECK-APPLE:  bl      _params_in_reg2
-; Restore calle save registers but don't clober swifterror x21.
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x29, x30, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x20, x19, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x23, x22, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x25, x24, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x27, x26, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldr     x28, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ret
+
 define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_in_reg:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    sub sp, sp, #112
+; CHECK-APPLE-NEXT:    stp x21, x28, [sp, #8] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x23, x22, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #96
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -40
+; CHECK-APPLE-NEXT:    .cfi_offset w23, -48
+; CHECK-APPLE-NEXT:    .cfi_offset w24, -56
+; CHECK-APPLE-NEXT:    .cfi_offset w25, -64
+; CHECK-APPLE-NEXT:    .cfi_offset w26, -72
+; CHECK-APPLE-NEXT:    .cfi_offset w27, -80
+; CHECK-APPLE-NEXT:    .cfi_offset w28, -96
+; CHECK-APPLE-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x23, x7
+; CHECK-APPLE-NEXT:    mov x24, x6
+; CHECK-APPLE-NEXT:    mov x25, x5
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x3
+; CHECK-APPLE-NEXT:    mov x28, x2
+; CHECK-APPLE-NEXT:    mov x19, x1
+; CHECK-APPLE-NEXT:    mov x22, x0
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    mov x0, x22
+; CHECK-APPLE-NEXT:    mov x1, x19
+; CHECK-APPLE-NEXT:    mov x2, x28
+; CHECK-APPLE-NEXT:    mov x3, x27
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x25
+; CHECK-APPLE-NEXT:    mov x6, x24
+; CHECK-APPLE-NEXT:    mov x7, x23
+; CHECK-APPLE-NEXT:    ldp x20, x21, [sp] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x23, x22, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x25, x24, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x27, x26, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldr x28, [sp, #16] ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #112
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: params_in_reg:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #128
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #112] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #112
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-O0-AARCH64-NEXT:    stur x21, [x29, #-32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x7, [x29, #-40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x6, [x29, #-48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x0
+; CHECK-O0-AARCH64-NEXT:    mov x20, xzr
+; CHECK-O0-AARCH64-NEXT:    mov x21, x20
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #2
+; CHECK-O0-AARCH64-NEXT:    mov w1, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #3
+; CHECK-O0-AARCH64-NEXT:    mov w2, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #4
+; CHECK-O0-AARCH64-NEXT:    mov w3, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #5
+; CHECK-O0-AARCH64-NEXT:    mov w4, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #6
+; CHECK-O0-AARCH64-NEXT:    mov w5, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #7
+; CHECK-O0-AARCH64-NEXT:    mov w6, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #8
+; CHECK-O0-AARCH64-NEXT:    mov w7, w8
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #112] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #128
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: params_in_reg:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #112
+; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov x20, xzr
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x20
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #2
+; CHECK-O0-ARM64_32-NEXT:    mov w1, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #3
+; CHECK-O0-ARM64_32-NEXT:    mov w2, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #4
+; CHECK-O0-ARM64_32-NEXT:    mov w3, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #5
+; CHECK-O0-ARM64_32-NEXT:    mov w4, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #6
+; CHECK-O0-ARM64_32-NEXT:    mov w5, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #7
+; CHECK-O0-ARM64_32-NEXT:    mov w6, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #8
+; CHECK-O0-ARM64_32-NEXT:    mov w7, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldp x20, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #112
+; CHECK-O0-ARM64_32-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -511,91 +1639,308 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* s
 }
 declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
 
-; CHECK-APPLE-LABEL: params_and_return_in_reg
-; Store callee saved registers.
-; CHECK-APPLE:  stp     x20, x28, [sp, #24
-; CHECK-APPLE:  stp     x27, x26, [sp
-; CHECK-APPLE:  stp     x25, x24, [sp
-; CHECK-APPLE:  stp     x23, x22, [sp
-; CHECK-APPLE:  stp     x20, x19, [sp
-; CHECK-APPLE:  stp     x29, x30, [sp
-; Save original arguments.
-; CHECK-APPLE:  mov      x23, x21
-; CHECK-APPLE:  str     x7, [sp, #16]
-; CHECK-APPLE:  mov      x24, x6
-; CHECK-APPLE:  mov      x25, x5
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x3
-; CHECK-APPLE:  mov      x28, x2
-; CHECK-APPLE:  mov      x19, x1
-; CHECK-APPLE:  mov      x22, x0
-; Setup call arguments.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov      x20, xzr
-; CHECK-APPLE:  mov      x21, xzr
-; CHECK-APPLE:  bl      _params_in_reg2
-; Store swifterror %error_ptr_ref.
-; CHECK-APPLE:  str     x21, [sp, #8]
-; Setup call arguments from original arguments.
-; CHECK-APPLE:  mov      x0, x22
-; CHECK-APPLE:  mov      x1, x19
-; CHECK-APPLE:  mov      x2, x28
-; CHECK-APPLE:  mov      x3, x27
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x25
-; CHECK-APPLE:  mov      x6, x24
-; CHECK-APPLE:  ldp     x7, x20, [sp, #16]
-; CHECK-APPLE:  mov      x21, x23
-; CHECK-APPLE:  bl      _params_and_return_in_reg2
-; Store return values.
-; CHECK-APPLE:  mov      x19, x0
-; CHECK-APPLE:  mov      x22, x1
-; CHECK-APPLE:  mov      x24, x2
-; CHECK-APPLE:  mov      x25, x3
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x5
-; CHECK-APPLE:  mov      x28, x6
-; CHECK-APPLE:  mov      x23, x7
-; Save swifterror %err.
-; CHECK-APPLE:  str     x21, [sp, #24]
-; Setup call.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov     x20, xzr
-; ... setup call with swiferror %error_ptr_ref.
-; CHECK-APPLE:  ldr     x21, [sp, #8]
-; CHECK-APPLE:  bl      _params_in_reg2
-; Restore return values for return from this function.
-; CHECK-APPLE:  mov      x0, x19
-; CHECK-APPLE:  mov      x1, x22
-; CHECK-APPLE:  mov      x2, x24
-; CHECK-APPLE:  mov      x3, x25
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x27
-; CHECK-APPLE:  mov      x6, x28
-; CHECK-APPLE:  mov      x7, x23
-; Restore swifterror %err and callee save registers.
-; CHECK-APPLE:  ldp     x21, x28, [sp, #24
-; CHECK-APPLE:  ldp     x29, x30, [sp
-; CHECK-APPLE:  ldp     x20, x19, [sp
-; CHECK-APPLE:  ldp     x23, x22, [sp
-; CHECK-APPLE:  ldp     x25, x24, [sp
-; CHECK-APPLE:  ldp     x27, x26, [sp
-; CHECK-APPLE:  ret
 define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_and_return_in_reg:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    sub sp, sp, #128
+; CHECK-APPLE-NEXT:    stp x20, x28, [sp, #24] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x23, x22, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #96] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #112] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #112
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -40
+; CHECK-APPLE-NEXT:    .cfi_offset w23, -48
+; CHECK-APPLE-NEXT:    .cfi_offset w24, -56
+; CHECK-APPLE-NEXT:    .cfi_offset w25, -64
+; CHECK-APPLE-NEXT:    .cfi_offset w26, -72
+; CHECK-APPLE-NEXT:    .cfi_offset w27, -80
+; CHECK-APPLE-NEXT:    .cfi_offset w28, -96
+; CHECK-APPLE-NEXT:    mov x23, x21
+; CHECK-APPLE-NEXT:    str x7, [sp, #16] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x24, x6
+; CHECK-APPLE-NEXT:    mov x25, x5
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x3
+; CHECK-APPLE-NEXT:    mov x28, x2
+; CHECK-APPLE-NEXT:    mov x19, x1
+; CHECK-APPLE-NEXT:    mov x22, x0
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x0, x22
+; CHECK-APPLE-NEXT:    mov x1, x19
+; CHECK-APPLE-NEXT:    mov x2, x28
+; CHECK-APPLE-NEXT:    mov x3, x27
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x25
+; CHECK-APPLE-NEXT:    mov x6, x24
+; CHECK-APPLE-NEXT:    ldp x7, x20, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    mov x21, x23
+; CHECK-APPLE-NEXT:    bl _params_and_return_in_reg2
+; CHECK-APPLE-NEXT:    mov x19, x0
+; CHECK-APPLE-NEXT:    mov x22, x1
+; CHECK-APPLE-NEXT:    mov x24, x2
+; CHECK-APPLE-NEXT:    mov x25, x3
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x5
+; CHECK-APPLE-NEXT:    mov x28, x6
+; CHECK-APPLE-NEXT:    mov x23, x7
+; CHECK-APPLE-NEXT:    str x21, [sp, #24] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    mov x0, x19
+; CHECK-APPLE-NEXT:    mov x1, x22
+; CHECK-APPLE-NEXT:    mov x2, x24
+; CHECK-APPLE-NEXT:    mov x3, x25
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x27
+; CHECK-APPLE-NEXT:    mov x6, x28
+; CHECK-APPLE-NEXT:    mov x7, x23
+; CHECK-APPLE-NEXT:    ldp x21, x28, [sp, #24] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #112] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #96] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x23, x22, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x25, x24, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x27, x26, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #128
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: params_and_return_in_reg:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #272
+; CHECK-O0-AARCH64-NEXT:    stp x28, x20, [sp, #240] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #256] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #256
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w20, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w28, -32
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x7, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x6, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x0
+; CHECK-O0-AARCH64-NEXT:    mov x20, xzr
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, x20
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #88] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #2
+; CHECK-O0-AARCH64-NEXT:    mov w1, w8
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #3
+; CHECK-O0-AARCH64-NEXT:    mov w2, w8
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #104] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #4
+; CHECK-O0-AARCH64-NEXT:    mov w3, w8
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #112] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #5
+; CHECK-O0-AARCH64-NEXT:    mov w4, w8
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #120] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #6
+; CHECK-O0-AARCH64-NEXT:    mov w5, w8
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #128] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #7
+; CHECK-O0-AARCH64-NEXT:    mov w6, w8
+; CHECK-O0-AARCH64-NEXT:    stur x6, [x29, #-120] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #8
+; CHECK-O0-AARCH64-NEXT:    mov w7, w8
+; CHECK-O0-AARCH64-NEXT:    stur x7, [x29, #-112] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x6, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x7, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-104] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_and_return_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x0
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #88] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x1
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-88] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x2
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #104] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-80] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x3
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #112] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-72] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x4
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #120] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-64] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x5
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #128] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x6
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-120] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x7
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-112] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-104] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldur x0, [x29, #-96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x1, [x29, #-88] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x2, [x29, #-80] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x3, [x29, #-72] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x4, [x29, #-64] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x5, [x29, #-56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #256] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x28, x20, [sp, #240] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #272
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: params_and_return_in_reg:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #272
+; CHECK-O0-ARM64_32-NEXT:    str x28, [sp, #240] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #256] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w28, -32
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov x20, xzr
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x20
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #88] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #2
+; CHECK-O0-ARM64_32-NEXT:    mov w1, w8
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #3
+; CHECK-O0-ARM64_32-NEXT:    mov w2, w8
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #104] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #4
+; CHECK-O0-ARM64_32-NEXT:    mov w3, w8
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #112] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #5
+; CHECK-O0-ARM64_32-NEXT:    mov w4, w8
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #120] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #6
+; CHECK-O0-ARM64_32-NEXT:    mov w5, w8
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #128] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #7
+; CHECK-O0-ARM64_32-NEXT:    mov w6, w8
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #136] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #8
+; CHECK-O0-ARM64_32-NEXT:    mov w7, w8
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #144] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #152] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_and_return_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x0
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #88] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #160] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x1
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #168] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x2
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #104] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #176] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x3
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #112] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #184] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x4
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #120] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #192] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x5
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #128] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #200] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x6
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #136] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #208] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x7
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #144] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #216] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #152] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #224] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #160] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #168] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #176] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #184] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #192] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #200] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #208] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #216] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #224] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x20, x30, [sp, #256] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x28, [sp, #240] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #272
+; CHECK-O0-ARM64_32-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -610,11 +1955,53 @@ declare void @acallee(i8*)
 
 ; Make sure we don't tail call if the caller returns a swifterror value. We
 ; would have to move into the swifterror register before the tail call.
-; CHECK-APPLE: tailcall_from_swifterror:
-; CHECK-APPLE-NOT: b _acallee
-; CHECK-APPLE: bl _acallee
-
 define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcall_from_swifterror:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -32
+; CHECK-APPLE-NEXT:    mov x19, x21
+; CHECK-APPLE-NEXT:    mov x0, xzr
+; CHECK-APPLE-NEXT:    bl _acallee
+; CHECK-APPLE-NEXT:    mov x21, x19
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcall_from_swifterror:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _acallee
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcall_from_swifterror:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _acallee
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   tail call void @acallee(i8* null)
   ret void
@@ -623,18 +2010,69 @@ entry:
 declare swiftcc void @foo2(%swift_error** swifterror)
 
 ; Make sure we properly assign registers during fast-isel.
-; CHECK-O0-LABEL: testAssign
-; CHECK-O0: mov     x21, xzr
-; CHECK-O0: bl      _foo2
-; CHECK-O0: str     x21, [s[[STK:.*]]]
-; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]]
-
-; CHECK-APPLE-LABEL: testAssign
-; CHECK-APPLE: mov      x21, xzr
-; CHECK-APPLE: bl      _foo2
-; CHECK-APPLE: mov      x0, x21
-
 define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+; CHECK-APPLE-LABEL: testAssign:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    sub sp, sp, #48
+; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #32
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w21, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -32
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _foo2
+; CHECK-APPLE-NEXT:    mov x0, x21
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #48
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: testAssign:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo2
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %a
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: testAssign:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo2
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %a
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    and x0, x8, #0xffffffff
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %error_ptr = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr

diff  --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll
index ba866dc8e1fcf..97a95f20551c5 100644
--- a/llvm/test/CodeGen/ARM/swifterror.ll
+++ b/llvm/test/CodeGen/ARM/swifterror.ll
@@ -1,4 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=armv7-apple-ios | FileCheck --check-prefix=CHECK-APPLE --check-prefix=CHECK-ARMV7 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=armv7-apple-ios | FileCheck --check-prefix=CHECK-APPLE %s
 ; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=armv7-apple-ios | FileCheck --check-prefix=CHECK-O0 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=armv7-linux-androideabi | FileCheck --check-prefix=CHECK-ANDROID %s
 
@@ -11,19 +12,45 @@ declare void @free(i8*)
 ; that takes a swifterror parameter and "caller" is the caller of "foo".
 define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo:
-; CHECK-APPLE: mov r0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r8, r{{.*}}
-; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
-
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {lr}
+; CHECK-APPLE-NEXT:    mov r0, #16
+; CHECK-APPLE-NEXT:    mov r1, #0
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    strb r0, [r8, #8]
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    pop {lr}
+; CHECK-APPLE-NEXT:    bx lr
+;
 ; CHECK-O0-LABEL: foo:
-; CHECK-O0: mov r{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov [[ID2:r[0-9]+]], r0
-; CHECK-O0: mov r8, [[ID2]]
-; CHECK-O0: mov [[ID:r[0-9]+]], #1
-; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8]
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    mov r0, #16
+; CHECK-O0-NEXT:    mov r1, #0
+; CHECK-O0-NEXT:    bl _malloc
+; CHECK-O0-NEXT:    mov r1, r0
+; CHECK-O0-NEXT:    mov r8, r1
+; CHECK-O0-NEXT:    mov r0, #1
+; CHECK-O0-NEXT:    strb r0, [r1, #8]
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: foo:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r11, lr}
+; CHECK-ANDROID-NEXT:    push {r11, lr}
+; CHECK-ANDROID-NEXT:    mov r0, #16
+; CHECK-ANDROID-NEXT:    mov r1, #0
+; CHECK-ANDROID-NEXT:    bl malloc
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    strb r0, [r8, #8]
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    pop {r11, pc}
+
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -36,30 +63,71 @@ entry:
 ; "caller" calls "foo" that takes a swifterror parameter.
 define float @caller(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller:
-; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE-DAG: mov r8, #0
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: mov r0, r8
-; CHECK-APPLE: cmp r8, #0
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r8, lr}
+; CHECK-APPLE-NEXT:    sub sp, sp, #4
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:    bl _foo
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    cmp r8, #0
+; CHECK-APPLE-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-APPLE-NEXT:    strbeq r1, [r4]
+; CHECK-APPLE-NEXT:    bl _free
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    add sp, sp, #4
+; CHECK-APPLE-NEXT:    pop {r4, r8, pc}
+;
+; CHECK-O0-LABEL: caller:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    sub sp, sp, #16
+; CHECK-O0-NEXT:    @ implicit-def: $r1
+; CHECK-O0-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    bl _foo
+; CHECK-O0-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, r8
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    movw r0, #0
+; CHECK-O0-NEXT:    cmp r8, r0
+; CHECK-O0-NEXT:    bne LBB1_2
+; CHECK-O0-NEXT:  @ %bb.1: @ %cont
+; CHECK-O0-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldrb r0, [r0, #8]
+; CHECK-O0-NEXT:    strb r0, [r1]
+; CHECK-O0-NEXT:  LBB1_2: @ %handler
+; CHECK-O0-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    bl _free
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: caller:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #8
+; CHECK-ANDROID-NEXT:    sub sp, sp, #8
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:    bl foo
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    cmp r8, #0
+; CHECK-ANDROID-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-ANDROID-NEXT:    strbeq r1, [r4]
+; CHECK-ANDROID-NEXT:    bl free
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    add sp, sp, #8
+; CHECK-ANDROID-NEXT:    pop {r4, r8, r11, pc}
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
 
-; CHECK-O0-LABEL: caller:
 ; spill r0
-; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]]
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[TMP:r[0-9]+]], r8
-; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]]
-; CHECK-O0: bne
-; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]]
-; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
 ; reload r0
-; CHECK-O0: ldr r0, [sp[[SLOT2]]]
-; CHECK-O0: free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -81,32 +149,111 @@ handler:
 ; "caller2" is the caller of "foo", it calls "foo" inside a loop.
 define float @caller2(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller2:
-; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE-DAG: mov r8, #0
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: cmp r8, #0
-; CHECK-APPLE: bne
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r8, lr}
+; CHECK-APPLE-NEXT:    vpush {d8}
+; CHECK-APPLE-NEXT:    sub sp, sp, #4
+; CHECK-APPLE-NEXT:    vmov.f32 s16, #1.000000e+00
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:  LBB2_1: @ %bb_loop
+; CHECK-APPLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    bl _foo
+; CHECK-APPLE-NEXT:    cmp r8, #0
+; CHECK-APPLE-NEXT:    bne LBB2_4
+; CHECK-APPLE-NEXT:  @ %bb.2: @ %cont
+; CHECK-APPLE-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-NEXT:    vmov s0, r0
+; CHECK-APPLE-NEXT:    vcmp.f32 s0, s16
+; CHECK-APPLE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-APPLE-NEXT:    ble LBB2_1
+; CHECK-APPLE-NEXT:  @ %bb.3: @ %bb_end
+; CHECK-APPLE-NEXT:    ldrb r0, [r8, #8]
+; CHECK-APPLE-NEXT:    strb r0, [r4]
+; CHECK-APPLE-NEXT:  LBB2_4: @ %handler
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    bl _free
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    add sp, sp, #4
+; CHECK-APPLE-NEXT:    vpop {d8}
+; CHECK-APPLE-NEXT:    pop {r4, r8, pc}
+;
+; CHECK-O0-LABEL: caller2:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    sub sp, sp, #20
+; CHECK-O0-NEXT:    @ implicit-def: $r1
+; CHECK-O0-NEXT:    str r0, [sp, #12] @ 4-byte Spill
+; CHECK-O0-NEXT:  LBB2_1: @ %bb_loop
+; CHECK-O0-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    bl _foo
+; CHECK-O0-NEXT:    vmov s0, r0
+; CHECK-O0-NEXT:    vstr s0, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, r8
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    movw r0, #0
+; CHECK-O0-NEXT:    cmp r8, r0
+; CHECK-O0-NEXT:    bne LBB2_4
+; CHECK-O0-NEXT:  @ %bb.2: @ %cont
+; CHECK-O0-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-NEXT:    vldr s0, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    vmov.f32 s2, #1.000000e+00
+; CHECK-O0-NEXT:    vcmp.f32 s0, s2
+; CHECK-O0-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-O0-NEXT:    ble LBB2_1
+; CHECK-O0-NEXT:  @ %bb.3: @ %bb_end
+; CHECK-O0-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldrb r0, [r0, #8]
+; CHECK-O0-NEXT:    strb r0, [r1]
+; CHECK-O0-NEXT:  LBB2_4: @ %handler
+; CHECK-O0-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    bl _free
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: caller2:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    .vsave {d8}
+; CHECK-ANDROID-NEXT:    vpush {d8}
+; CHECK-ANDROID-NEXT:    .pad #8
+; CHECK-ANDROID-NEXT:    sub sp, sp, #8
+; CHECK-ANDROID-NEXT:    vmov.f32 s16, #1.000000e+00
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:  .LBB2_1: @ %bb_loop
+; CHECK-ANDROID-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    bl foo
+; CHECK-ANDROID-NEXT:    cmp r8, #0
+; CHECK-ANDROID-NEXT:    bne .LBB2_4
+; CHECK-ANDROID-NEXT:  @ %bb.2: @ %cont
+; CHECK-ANDROID-NEXT:    @ in Loop: Header=BB2_1 Depth=1
+; CHECK-ANDROID-NEXT:    vmov s0, r0
+; CHECK-ANDROID-NEXT:    vcmp.f32 s0, s16
+; CHECK-ANDROID-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-ANDROID-NEXT:    ble .LBB2_1
+; CHECK-ANDROID-NEXT:  @ %bb.3: @ %bb_end
+; CHECK-ANDROID-NEXT:    ldrb r0, [r8, #8]
+; CHECK-ANDROID-NEXT:    strb r0, [r4]
+; CHECK-ANDROID-NEXT:  .LBB2_4: @ %handler
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    bl free
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    add sp, sp, #8
+; CHECK-ANDROID-NEXT:    vpop {d8}
+; CHECK-ANDROID-NEXT:    pop {r4, r8, r11, pc}
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:r[0-9]+]], [r8, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov r0, r8
-; CHECK-APPLE: bl {{.*}}free
 
-; CHECK-O0-LABEL: caller2:
 ; spill r0
-; CHECK-O0-DAG: str r0,
-; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov r{{.*}}, r8
-; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}]
-; CHECK-O0: bne
-; CHECK-O0: ble
 ; reload r0
-; CHECK-O0: ldr [[ID:r[0-9]+]],
-; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}]
-; CHECK-O0: free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -134,26 +281,84 @@ handler:
 ; under a certain condition.
 define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: foo_if:
-; CHECK-APPLE: cmp r0, #0
-; CHECK-APPLE: eq
-; CHECK-APPLE: mov r0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE-DAG: mov [[ID:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r8, r{{.*}}
-; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
-
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {lr}
+; CHECK-APPLE-NEXT:    cmp r0, #0
+; CHECK-APPLE-NEXT:    beq LBB3_2
+; CHECK-APPLE-NEXT:  @ %bb.1: @ %gen_error
+; CHECK-APPLE-NEXT:    mov r0, #16
+; CHECK-APPLE-NEXT:    mov r1, #0
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    vmov.f32 s0, #1.000000e+00
+; CHECK-APPLE-NEXT:    strb r0, [r8, #8]
+; CHECK-APPLE-NEXT:    b LBB3_3
+; CHECK-APPLE-NEXT:  LBB3_2:
+; CHECK-APPLE-NEXT:    vldr s0, LCPI3_0
+; CHECK-APPLE-NEXT:  LBB3_3: @ %common.ret
+; CHECK-APPLE-NEXT:    vmov r0, s0
+; CHECK-APPLE-NEXT:    pop {lr}
+; CHECK-APPLE-NEXT:    bx lr
+; CHECK-APPLE-NEXT:    .p2align 2
+; CHECK-APPLE-NEXT:  @ %bb.4:
+; CHECK-APPLE-NEXT:    .data_region
+; CHECK-APPLE-NEXT:  LCPI3_0:
+; CHECK-APPLE-NEXT:    .long 0x00000000 @ float 0
+; CHECK-APPLE-NEXT:    .end_data_region
+;
 ; CHECK-O0-LABEL: foo_if:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    sub sp, sp, #4
+; CHECK-O0-NEXT:    str r8, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    cmp r0, #0
+; CHECK-O0-NEXT:    beq LBB3_2
+; CHECK-O0-NEXT:  @ %bb.1: @ %gen_error
+; CHECK-O0-NEXT:    mov r0, #16
+; CHECK-O0-NEXT:    mov r1, #0
+; CHECK-O0-NEXT:    bl _malloc
+; CHECK-O0-NEXT:    mov r1, r0
+; CHECK-O0-NEXT:    mov r8, r1
+; CHECK-O0-NEXT:    mov r0, #1
+; CHECK-O0-NEXT:    strb r0, [r1, #8]
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, pc}
+; CHECK-O0-NEXT:  LBB3_2: @ %normal
+; CHECK-O0-NEXT:    ldr r8, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r0, #0
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: foo_if:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r11, lr}
+; CHECK-ANDROID-NEXT:    push {r11, lr}
+; CHECK-ANDROID-NEXT:    cmp r0, #0
+; CHECK-ANDROID-NEXT:    beq .LBB3_2
+; CHECK-ANDROID-NEXT:  @ %bb.1: @ %gen_error
+; CHECK-ANDROID-NEXT:    mov r0, #16
+; CHECK-ANDROID-NEXT:    mov r1, #0
+; CHECK-ANDROID-NEXT:    bl malloc
+; CHECK-ANDROID-NEXT:    vmov.f32 s0, #1.000000e+00
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    strb r0, [r8, #8]
+; CHECK-ANDROID-NEXT:    vmov r0, s0
+; CHECK-ANDROID-NEXT:    pop {r11, pc}
+; CHECK-ANDROID-NEXT:  .LBB3_2:
+; CHECK-ANDROID-NEXT:    vldr s0, .LCPI3_0
+; CHECK-ANDROID-NEXT:    vmov r0, s0
+; CHECK-ANDROID-NEXT:    pop {r11, pc}
+; CHECK-ANDROID-NEXT:    .p2align 2
+; CHECK-ANDROID-NEXT:  @ %bb.3:
+; CHECK-ANDROID-NEXT:  .LCPI3_0:
+; CHECK-ANDROID-NEXT:    .long 0x00000000 @ float 0
+
 ; spill to stack
-; CHECK-O0: str r8
-; CHECK-O0: cmp r0, #0
-; CHECK-O0: beq
-; CHECK-O0: mov r0, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov [[ID:r[0-9]+]], r0
-; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1
-; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8]
 ; reload from stack
-; CHECK-O0: ldr r8
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -174,29 +379,114 @@ normal:
 ; under a certain condition inside a loop.
 define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-APPLE-LABEL: foo_loop:
-; CHECK-APPLE: mov [[CODE:r[0-9]+]], r0
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r5, lr}
+; CHECK-APPLE-NEXT:    vpush {d8, d9}
+; CHECK-APPLE-NEXT:    vmov.f32 s18, #1.000000e+00
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:    vmov s16, r1
+; CHECK-APPLE-NEXT:    mov r5, #1
+; CHECK-APPLE-NEXT:    b LBB4_2
+; CHECK-APPLE-NEXT:  LBB4_1: @ %bb_cont
+; CHECK-APPLE-NEXT:    @ in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    vcmp.f32 s16, s18
+; CHECK-APPLE-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-APPLE-NEXT:    bgt LBB4_4
+; CHECK-APPLE-NEXT:  LBB4_2: @ %bb_loop
+; CHECK-APPLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    cmp r4, #0
+; CHECK-APPLE-NEXT:    beq LBB4_1
+; CHECK-APPLE-NEXT:  @ %bb.3: @ %gen_error
+; CHECK-APPLE-NEXT:    @ in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    mov r0, #16
+; CHECK-APPLE-NEXT:    mov r1, #0
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    strb r5, [r0, #8]
+; CHECK-APPLE-NEXT:    b LBB4_1
+; CHECK-APPLE-NEXT:  LBB4_4: @ %bb_end
+; CHECK-APPLE-NEXT:    mov r0, #0
+; CHECK-APPLE-NEXT:    vpop {d8, d9}
+; CHECK-APPLE-NEXT:    pop {r4, r5, pc}
+;
+; CHECK-O0-LABEL: foo_loop:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    sub sp, sp, #20
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    vmov s0, r1
+; CHECK-O0-NEXT:    vstr s0, [r7, #-8] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r8, [r7, #-4] @ 4-byte Spill
+; CHECK-O0-NEXT:    b LBB4_1
+; CHECK-O0-NEXT:  LBB4_1: @ %bb_loop
+; CHECK-O0-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [r7, #-4] @ 4-byte Reload
+; CHECK-O0-NEXT:    cmp r1, #0
+; CHECK-O0-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    beq LBB4_3
+; CHECK-O0-NEXT:  @ %bb.2: @ %gen_error
+; CHECK-O0-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-NEXT:    mov r0, #16
+; CHECK-O0-NEXT:    mov r1, #0
+; CHECK-O0-NEXT:    bl _malloc
+; CHECK-O0-NEXT:    mov r2, r0
+; CHECK-O0-NEXT:    movw r1, #1
+; CHECK-O0-NEXT:    strb r1, [r2, #8]
+; CHECK-O0-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:  LBB4_3: @ %bb_cont
+; CHECK-O0-NEXT:    @ in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-NEXT:    vldr s0, [r7, #-8] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    vmov.f32 s2, #1.000000e+00
+; CHECK-O0-NEXT:    vcmp.f32 s0, s2
+; CHECK-O0-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-O0-NEXT:    str r0, [r7, #-4] @ 4-byte Spill
+; CHECK-O0-NEXT:    ble LBB4_1
+; CHECK-O0-NEXT:  @ %bb.4: @ %bb_end
+; CHECK-O0-NEXT:    ldr r8, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r0, #0
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: foo_loop:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r5, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r5, r11, lr}
+; CHECK-ANDROID-NEXT:    .vsave {d8, d9}
+; CHECK-ANDROID-NEXT:    vpush {d8, d9}
+; CHECK-ANDROID-NEXT:    vmov.f32 s18, #1.000000e+00
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:    vmov s16, r1
+; CHECK-ANDROID-NEXT:    mov r5, #1
+; CHECK-ANDROID-NEXT:    b .LBB4_2
+; CHECK-ANDROID-NEXT:  .LBB4_1: @ %bb_cont
+; CHECK-ANDROID-NEXT:    @ in Loop: Header=BB4_2 Depth=1
+; CHECK-ANDROID-NEXT:    vcmp.f32 s16, s18
+; CHECK-ANDROID-NEXT:    vmrs APSR_nzcv, fpscr
+; CHECK-ANDROID-NEXT:    bgt .LBB4_4
+; CHECK-ANDROID-NEXT:  .LBB4_2: @ %bb_loop
+; CHECK-ANDROID-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-ANDROID-NEXT:    cmp r4, #0
+; CHECK-ANDROID-NEXT:    beq .LBB4_1
+; CHECK-ANDROID-NEXT:  @ %bb.3: @ %gen_error
+; CHECK-ANDROID-NEXT:    @ in Loop: Header=BB4_2 Depth=1
+; CHECK-ANDROID-NEXT:    mov r0, #16
+; CHECK-ANDROID-NEXT:    mov r1, #0
+; CHECK-ANDROID-NEXT:    bl malloc
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    strb r5, [r0, #8]
+; CHECK-ANDROID-NEXT:    b .LBB4_1
+; CHECK-ANDROID-NEXT:  .LBB4_4: @ %bb_end
+; CHECK-ANDROID-NEXT:    mov r0, #0
+; CHECK-ANDROID-NEXT:    vpop {d8, d9}
+; CHECK-ANDROID-NEXT:    pop {r4, r5, r11, pc}
 ; swifterror is kept in a register
-; CHECK-APPLE: cmp [[CODE]], #0
-; CHECK-APPLE: beq
-; CHECK-APPLE: mov r0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: strb r{{.*}}, [r0, #8]
-; CHECK-APPLE: b
 
-; CHECK-O0-LABEL: foo_loop:
-; CHECK-O0: cmp r{{.*}}, #0
-; CHECK-O0: beq
-; CHECK-O0: mov r0, #16
-; CHECK-O0: malloc
-; CHECK-O0-DAG: mov [[ID:r[0-9]+]], r0
-; CHECK-O0-DAG: movw [[ID2:.*]], #1
-; CHECK-O0: strb [[ID2]], [{{.*}}[[ID]], #8]
 ; spill r0
-; CHECK-O0: str r0, [sp{{.*}}]
-; CHECK-O0: vcmp
-; CHECK-O0: ble
 ; reload from stack
-; CHECK-O0: ldr r8
 entry:
   br label %bb_loop
 
@@ -223,27 +513,56 @@ bb_end:
 ; parameter.
 define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo_sret:
-; CHECK-APPLE: mov [[SRET:r[0-9]+]], r0
-; CHECK-APPLE: mov r0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[REG:r[0-9]+]], #1
-; CHECK-APPLE-DAG: mov r8, r0
-; CHECK-APPLE-DAG: strb [[REG]], [r0, #8]
-; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4]
-
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r5, lr}
+; CHECK-APPLE-NEXT:    mov r4, r1
+; CHECK-APPLE-NEXT:    mov r5, r0
+; CHECK-APPLE-NEXT:    mov r0, #16
+; CHECK-APPLE-NEXT:    mov r1, #0
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov r1, #1
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    strb r1, [r0, #8]
+; CHECK-APPLE-NEXT:    str r4, [r5, #4]
+; CHECK-APPLE-NEXT:    pop {r4, r5, pc}
+;
 ; CHECK-O0-LABEL: foo_sret:
-; CHECK-O0-DAG: mov r{{.*}}, #16
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    sub sp, sp, #8
+; CHECK-O0-NEXT:    str r1, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, #16
+; CHECK-O0-NEXT:    mov r1, #0
+; CHECK-O0-NEXT:    bl _malloc
+; CHECK-O0-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r3, r0
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r8, r3
+; CHECK-O0-NEXT:    mov r2, #1
+; CHECK-O0-NEXT:    strb r2, [r3, #8]
+; CHECK-O0-NEXT:    str r1, [r0, #4]
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: foo_sret:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r5, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r5, r11, lr}
+; CHECK-ANDROID-NEXT:    mov r4, r1
+; CHECK-ANDROID-NEXT:    mov r5, r0
+; CHECK-ANDROID-NEXT:    mov r0, #16
+; CHECK-ANDROID-NEXT:    mov r1, #0
+; CHECK-ANDROID-NEXT:    bl malloc
+; CHECK-ANDROID-NEXT:    mov r1, #1
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    strb r1, [r0, #8]
+; CHECK-ANDROID-NEXT:    str r4, [r5, #4]
+; CHECK-ANDROID-NEXT:    pop {r4, r5, r11, pc}
+
 ; spill to stack: sret and val1
-; CHECK-O0-DAG: str r0
-; CHECK-O0-DAG: str r1
-; CHECK-O0: malloc
 ; reload from stack: sret and val1
-; CHECK-O0: ldr
-; CHECK-O0: ldr
-; CHECK-O0-DAG: mov r8
-; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1
-; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8]
-; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4]
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -258,30 +577,79 @@ entry:
 ; "caller3" calls "foo_sret" that takes a swifterror parameter.
 define float @caller3(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller3:
-; CHECK-APPLE: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE: mov r8, #0
-; CHECK-APPLE: bl {{.*}}foo_sret
-; CHECK-APPLE: mov r0, r8
-; CHECK-APPLE: cmp r8, #0
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r7, r8, lr}
+; CHECK-APPLE-NEXT:    add r7, sp, #8
+; CHECK-APPLE-NEXT:    sub sp, sp, #32
+; CHECK-APPLE-NEXT:    bfc sp, #0, #3
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:    add r0, sp, #8
+; CHECK-APPLE-NEXT:    mov r1, #1
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    bl _foo_sret
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    cmp r8, #0
+; CHECK-APPLE-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-APPLE-NEXT:    strbeq r1, [r4]
+; CHECK-APPLE-NEXT:    bl _free
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    sub sp, r7, #8
+; CHECK-APPLE-NEXT:    pop {r4, r7, r8, pc}
+;
+; CHECK-O0-LABEL: caller3:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    sub sp, sp, #44
+; CHECK-O0-NEXT:    bfc sp, #0, #3
+; CHECK-O0-NEXT:    @ implicit-def: $r1
+; CHECK-O0-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    add r0, sp, #16
+; CHECK-O0-NEXT:    mov r1, #1
+; CHECK-O0-NEXT:    bl _foo_sret
+; CHECK-O0-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, r8
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    movw r0, #0
+; CHECK-O0-NEXT:    cmp r8, r0
+; CHECK-O0-NEXT:    bne LBB6_2
+; CHECK-O0-NEXT:  @ %bb.1: @ %cont
+; CHECK-O0-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldrb r0, [r0, #8]
+; CHECK-O0-NEXT:    strb r0, [r1]
+; CHECK-O0-NEXT:  LBB6_2: @ %handler
+; CHECK-O0-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    bl _free
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: caller3:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #32
+; CHECK-ANDROID-NEXT:    sub sp, sp, #32
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:    add r0, sp, #8
+; CHECK-ANDROID-NEXT:    mov r1, #1
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    bl foo_sret
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    cmp r8, #0
+; CHECK-ANDROID-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-ANDROID-NEXT:    strbeq r1, [r4]
+; CHECK-ANDROID-NEXT:    bl free
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    add sp, sp, #32
+; CHECK-ANDROID-NEXT:    pop {r4, r8, r11, pc}
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
 
-; CHECK-O0-LABEL: caller3:
-; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0-DAG: mov r1
-; CHECK-O0: bl {{.*}}foo_sret
-; CHECK-O0: mov [[ID2:r[0-9]+]], r8
-; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]]
-; CHECK-O0: cmp r8
-; CHECK-O0: bne
 ; Access part of the error object and save it to error_ref
-; CHECK-O0: ldr [[ID:r[0-9]+]]
-; CHECK-O0: ldrb [[CODE:r[0-9]+]]
-; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: ldr r0, [sp[[SLOT]]
-; CHECK-O0: bl {{.*}}free
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -306,11 +674,104 @@ handler:
 declare void @llvm.va_start(i8*) nounwind
 define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
 ; CHECK-APPLE-LABEL: foo_vararg:
-; CHECK-APPLE: mov r0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov r8, r0
-; CHECK-APPLE: mov [[ID:r[0-9]+]], #1
-; CHECK-APPLE-DAG: strb [[ID]], [r8, #8]
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    sub sp, sp, #16
+; CHECK-APPLE-NEXT:    push {r7, lr}
+; CHECK-APPLE-NEXT:    mov r7, sp
+; CHECK-APPLE-NEXT:    sub sp, sp, #24
+; CHECK-APPLE-NEXT:    bfc sp, #0, #3
+; CHECK-APPLE-NEXT:    add r8, r7, #8
+; CHECK-APPLE-NEXT:    stm r8, {r0, r1, r2, r3}
+; CHECK-APPLE-NEXT:    mov r0, #16
+; CHECK-APPLE-NEXT:    mov r1, #0
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    add r3, r7, #8
+; CHECK-APPLE-NEXT:    strb r0, [r8, #8]
+; CHECK-APPLE-NEXT:    add r0, r7, #8
+; CHECK-APPLE-NEXT:    ldm r3, {r1, r2, r3}
+; CHECK-APPLE-NEXT:    add r0, r0, #12
+; CHECK-APPLE-NEXT:    str r0, [sp, #16]
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    str r1, [sp, #12]
+; CHECK-APPLE-NEXT:    str r2, [sp, #8]
+; CHECK-APPLE-NEXT:    str r3, [sp, #4]
+; CHECK-APPLE-NEXT:    mov sp, r7
+; CHECK-APPLE-NEXT:    pop {r7, lr}
+; CHECK-APPLE-NEXT:    add sp, sp, #16
+; CHECK-APPLE-NEXT:    bx lr
+;
+; CHECK-O0-LABEL: foo_vararg:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    sub sp, sp, #16
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    sub sp, sp, #24
+; CHECK-O0-NEXT:    bfc sp, #0, #3
+; CHECK-O0-NEXT:    str r3, [r7, #20]
+; CHECK-O0-NEXT:    str r2, [r7, #16]
+; CHECK-O0-NEXT:    str r1, [r7, #12]
+; CHECK-O0-NEXT:    str r0, [r7, #8]
+; CHECK-O0-NEXT:    mov r0, #16
+; CHECK-O0-NEXT:    mov r1, #0
+; CHECK-O0-NEXT:    bl _malloc
+; CHECK-O0-NEXT:    mov r1, r0
+; CHECK-O0-NEXT:    mov r8, r1
+; CHECK-O0-NEXT:    mov r0, #1
+; CHECK-O0-NEXT:    strb r0, [r1, #8]
+; CHECK-O0-NEXT:    add r0, r7, #8
+; CHECK-O0-NEXT:    str r0, [sp, #16]
+; CHECK-O0-NEXT:    ldr r0, [sp, #16]
+; CHECK-O0-NEXT:    add r1, r0, #4
+; CHECK-O0-NEXT:    str r1, [sp, #16]
+; CHECK-O0-NEXT:    ldr r0, [r0]
+; CHECK-O0-NEXT:    str r0, [sp, #12]
+; CHECK-O0-NEXT:    ldr r0, [sp, #16]
+; CHECK-O0-NEXT:    add r1, r0, #4
+; CHECK-O0-NEXT:    str r1, [sp, #16]
+; CHECK-O0-NEXT:    ldr r0, [r0]
+; CHECK-O0-NEXT:    str r0, [sp, #8]
+; CHECK-O0-NEXT:    ldr r0, [sp, #16]
+; CHECK-O0-NEXT:    add r1, r0, #4
+; CHECK-O0-NEXT:    str r1, [sp, #16]
+; CHECK-O0-NEXT:    ldr r0, [r0]
+; CHECK-O0-NEXT:    str r0, [sp, #4]
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, lr}
+; CHECK-O0-NEXT:    add sp, sp, #16
+; CHECK-O0-NEXT:    bx lr
+;
+; CHECK-ANDROID-LABEL: foo_vararg:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .pad #16
+; CHECK-ANDROID-NEXT:    sub sp, sp, #16
+; CHECK-ANDROID-NEXT:    .save {r11, lr}
+; CHECK-ANDROID-NEXT:    push {r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #24
+; CHECK-ANDROID-NEXT:    sub sp, sp, #24
+; CHECK-ANDROID-NEXT:    add r8, sp, #32
+; CHECK-ANDROID-NEXT:    stm r8, {r0, r1, r2, r3}
+; CHECK-ANDROID-NEXT:    mov r0, #16
+; CHECK-ANDROID-NEXT:    mov r1, #0
+; CHECK-ANDROID-NEXT:    bl malloc
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    add r3, sp, #32
+; CHECK-ANDROID-NEXT:    strb r0, [r8, #8]
+; CHECK-ANDROID-NEXT:    add r0, sp, #32
+; CHECK-ANDROID-NEXT:    ldm r3, {r1, r2, r3}
+; CHECK-ANDROID-NEXT:    add r0, r0, #12
+; CHECK-ANDROID-NEXT:    str r0, [sp, #16]
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    str r1, [sp, #12]
+; CHECK-ANDROID-NEXT:    str r2, [sp, #8]
+; CHECK-ANDROID-NEXT:    str r3, [sp, #4]
+; CHECK-ANDROID-NEXT:    add sp, sp, #24
+; CHECK-ANDROID-NEXT:    pop {r11, lr}
+; CHECK-ANDROID-NEXT:    add sp, sp, #16
+; CHECK-ANDROID-NEXT:    bx lr
 
 entry:
   %call = call i8* @malloc(i64 16)
@@ -338,15 +799,95 @@ entry:
 ; "caller4" calls "foo_vararg" that takes a swifterror parameter.
 define float @caller4(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller4:
-; CHECK-APPLE: mov [[ID:r[0-9]+]], r0
-; CHECK-APPLE: mov r8, #0
-; CHECK-APPLE: bl {{.*}}foo_vararg
-; CHECK-APPLE: mov r0, r8
-; CHECK-APPLE: cmp r8, #0
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, r8, lr}
+; CHECK-APPLE-NEXT:    sub sp, sp, #16
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:    mov r0, #11
+; CHECK-APPLE-NEXT:    str r0, [sp, #4]
+; CHECK-APPLE-NEXT:    mov r0, #10
+; CHECK-APPLE-NEXT:    str r0, [sp, #8]
+; CHECK-APPLE-NEXT:    mov r0, #12
+; CHECK-APPLE-NEXT:    str r0, [sp]
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    mov r0, #10
+; CHECK-APPLE-NEXT:    mov r1, #11
+; CHECK-APPLE-NEXT:    mov r2, #12
+; CHECK-APPLE-NEXT:    bl _foo_vararg
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    cmp r8, #0
+; CHECK-APPLE-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-APPLE-NEXT:    strbeq r1, [r4]
+; CHECK-APPLE-NEXT:    bl _free
+; CHECK-APPLE-NEXT:    mov r0, #1065353216
+; CHECK-APPLE-NEXT:    add sp, sp, #16
+; CHECK-APPLE-NEXT:    pop {r4, r8, pc}
+;
+; CHECK-O0-LABEL: caller4:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    sub sp, sp, #28
+; CHECK-O0-NEXT:    @ implicit-def: $r1
+; CHECK-O0-NEXT:    str r0, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    mov r0, #10
+; CHECK-O0-NEXT:    str r0, [r7, #-12]
+; CHECK-O0-NEXT:    mov r0, #11
+; CHECK-O0-NEXT:    str r0, [sp, #16]
+; CHECK-O0-NEXT:    mov r0, #12
+; CHECK-O0-NEXT:    str r0, [sp, #12]
+; CHECK-O0-NEXT:    ldr r0, [r7, #-12]
+; CHECK-O0-NEXT:    ldr r1, [sp, #16]
+; CHECK-O0-NEXT:    ldr r2, [sp, #12]
+; CHECK-O0-NEXT:    bl _foo_vararg
+; CHECK-O0-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, r8
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    movw r0, #0
+; CHECK-O0-NEXT:    cmp r8, r0
+; CHECK-O0-NEXT:    bne LBB8_2
+; CHECK-O0-NEXT:  @ %bb.1: @ %cont
+; CHECK-O0-NEXT:    ldr r1, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldrb r0, [r0, #8]
+; CHECK-O0-NEXT:    strb r0, [r1]
+; CHECK-O0-NEXT:  LBB8_2: @ %handler
+; CHECK-O0-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    bl _free
+; CHECK-O0-NEXT:    mov r0, #1065353216
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: caller4:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r8, r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #16
+; CHECK-ANDROID-NEXT:    sub sp, sp, #16
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:    mov r0, #11
+; CHECK-ANDROID-NEXT:    str r0, [sp, #4]
+; CHECK-ANDROID-NEXT:    mov r0, #10
+; CHECK-ANDROID-NEXT:    str r0, [sp, #8]
+; CHECK-ANDROID-NEXT:    mov r0, #12
+; CHECK-ANDROID-NEXT:    str r0, [sp]
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    mov r0, #10
+; CHECK-ANDROID-NEXT:    mov r1, #11
+; CHECK-ANDROID-NEXT:    mov r2, #12
+; CHECK-ANDROID-NEXT:    bl foo_vararg
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    cmp r8, #0
+; CHECK-ANDROID-NEXT:    ldrbeq r1, [r0, #8]
+; CHECK-ANDROID-NEXT:    strbeq r1, [r4]
+; CHECK-ANDROID-NEXT:    bl free
+; CHECK-ANDROID-NEXT:    mov r0, #1065353216
+; CHECK-ANDROID-NEXT:    add sp, sp, #16
+; CHECK-ANDROID-NEXT:    pop {r4, r8, r11, pc}
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -379,63 +920,212 @@ handler:
 
 ; Check that we don't blow up on tail calling swifterror argument functions.
 define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror:
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {lr}
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror
+; CHECK-APPLE-NEXT:    pop {lr}
+; CHECK-APPLE-NEXT:    bx lr
+;
+; CHECK-O0-LABEL: tailcallswifterror:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    bl _tailcallswifterror
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: tailcallswifterror:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r11, lr}
+; CHECK-ANDROID-NEXT:    push {r11, lr}
+; CHECK-ANDROID-NEXT:    bl tailcallswifterror
+; CHECK-ANDROID-NEXT:    pop {r11, pc}
 entry:
   %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror_swiftcc:
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {lr}
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-APPLE-NEXT:    pop {lr}
+; CHECK-APPLE-NEXT:    bx lr
+;
+; CHECK-O0-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: tailcallswifterror_swiftcc:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r11, lr}
+; CHECK-ANDROID-NEXT:    push {r11, lr}
+; CHECK-ANDROID-NEXT:    bl tailcallswifterror_swiftcc
+; CHECK-ANDROID-NEXT:    pop {r11, pc}
 entry:
   %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 
-; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: mov [[REG:r[0-9]+]], r8
-; CHECK-APPLE: nop
-; CHECK-APPLE: mov r8, [[REG]]
 define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: swifterror_clobber:
+; CHECK-APPLE:       @ %bb.0:
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    @ InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    @ InlineAsm End
+; CHECK-APPLE-NEXT:    mov r8, r0
+; CHECK-APPLE-NEXT:    bx lr
+;
+; CHECK-O0-LABEL: swifterror_clobber:
+; CHECK-O0:       @ %bb.0:
+; CHECK-O0-NEXT:    sub sp, sp, #4
+; CHECK-O0-NEXT:    str r8, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    @ InlineAsm Start
+; CHECK-O0-NEXT:    nop
+; CHECK-O0-NEXT:    @ InlineAsm End
+; CHECK-O0-NEXT:    ldr r8, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    add sp, sp, #4
+; CHECK-O0-NEXT:    bx lr
+;
+; CHECK-ANDROID-LABEL: swifterror_clobber:
+; CHECK-ANDROID:       @ %bb.0:
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    @APP
+; CHECK-ANDROID-NEXT:    nop
+; CHECK-ANDROID-NEXT:    @NO_APP
+; CHECK-ANDROID-NEXT:    mov r8, r0
+; CHECK-ANDROID-NEXT:    bx lr
   call void asm sideeffect "nop", "~{r8}"()
   ret void
 }
 
-; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: push {{.*}}r8
-; CHECK-APPLE: nop
-; CHECK-APPLE: pop  {{.*}}r8
 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
+; CHECK-APPLE-LABEL: swifterror_reg_clobber:
+; CHECK-APPLE:       @ %bb.0:
+; CHECK-APPLE-NEXT:    push {r8, lr}
+; CHECK-APPLE-NEXT:    @ InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    @ InlineAsm End
+; CHECK-APPLE-NEXT:    pop {r8, pc}
+;
+; CHECK-O0-LABEL: swifterror_reg_clobber:
+; CHECK-O0:       @ %bb.0:
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    @ InlineAsm Start
+; CHECK-O0-NEXT:    nop
+; CHECK-O0-NEXT:    @ InlineAsm End
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: swifterror_reg_clobber:
+; CHECK-ANDROID:       @ %bb.0:
+; CHECK-ANDROID-NEXT:    .save {r8, lr}
+; CHECK-ANDROID-NEXT:    push {r8, lr}
+; CHECK-ANDROID-NEXT:    @APP
+; CHECK-ANDROID-NEXT:    nop
+; CHECK-ANDROID-NEXT:    @NO_APP
+; CHECK-ANDROID-NEXT:    pop {r8, pc}
   call void asm sideeffect "nop", "~{r8}"()
   ret void
 }
 
-; CHECK-ARMV7-LABEL: _params_in_reg
-; Store callee saved registers excluding swifterror.
-; CHECK-ARMV7:  push   {r4, r5, r6, r7, r10, r11, lr}
-; Store swiftself (r10) and swifterror (r8).
-; CHECK-ARMV7-DAG:  str     r8, [s[[STK1:.*]]]
-; CHECK-ARMV7-DAG:  str     r10, [s[[STK2:.*]]]
-; Store arguments.
-; CHECK-ARMV7-DAG:  mov     r6, r3
-; CHECK-ARMV7-DAG:  mov     r4, r2
-; CHECK-ARMV7-DAG:  mov     r11, r1
-; CHECK-ARMV7-DAG:  mov     r5, r0
-; Setup call.
-; CHECK-ARMV7:  mov     r0, #1
-; CHECK-ARMV7:  mov     r1, #2
-; CHECK-ARMV7:  mov     r2, #3
-; CHECK-ARMV7:  mov     r3, #4
-; CHECK-ARMV7:  mov     r10, #0
-; CHECK-ARMV7:  mov     r8, #0
-; CHECK-ARMV7:  bl      _params_in_reg2
-; Restore original arguments.
-; CHECK-ARMV7-DAG:  ldr     r10, [s[[STK2]]]
-; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK1]]]
-; CHECK-ARMV7-DAG:  mov     r0, r5
-; CHECK-ARMV7-DAG:  mov     r1, r11
-; CHECK-ARMV7-DAG:  mov     r2, r4
-; CHECK-ARMV7-DAG:  mov     r3, r6
-; CHECK-ARMV7:  bl      _params_in_reg2
-; CHECK-ARMV7:  pop     {r4, r5, r6, r7, r10, r11, pc}
 define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_in_reg:
+; CHECK-APPLE:       @ %bb.0:
+; CHECK-APPLE-NEXT:    push {r4, r5, r6, r7, r10, r11, lr}
+; CHECK-APPLE-NEXT:    add r7, sp, #20
+; CHECK-APPLE-NEXT:    sub sp, sp, #12
+; CHECK-APPLE-NEXT:    bfc sp, #0, #3
+; CHECK-APPLE-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    mov r6, r3
+; CHECK-APPLE-NEXT:    str r10, [sp] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    mov r4, r2
+; CHECK-APPLE-NEXT:    mov r11, r1
+; CHECK-APPLE-NEXT:    mov r5, r0
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    mov r1, #2
+; CHECK-APPLE-NEXT:    mov r2, #3
+; CHECK-APPLE-NEXT:    mov r3, #4
+; CHECK-APPLE-NEXT:    mov r10, #0
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    ldr r10, [sp] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    mov r0, r5
+; CHECK-APPLE-NEXT:    ldr r8, [sp, #4] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    mov r1, r11
+; CHECK-APPLE-NEXT:    mov r2, r4
+; CHECK-APPLE-NEXT:    mov r3, r6
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    sub sp, r7, #20
+; CHECK-APPLE-NEXT:    pop {r4, r5, r6, r7, r10, r11, pc}
+;
+; CHECK-O0-LABEL: params_in_reg:
+; CHECK-O0:       @ %bb.0:
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r10}
+; CHECK-O0-NEXT:    sub sp, sp, #28
+; CHECK-O0-NEXT:    bfc sp, #0, #3
+; CHECK-O0-NEXT:    str r8, [sp, #20] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r10, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r0, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    @ implicit-def: $r0
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    mov r0, #1
+; CHECK-O0-NEXT:    mov r1, #2
+; CHECK-O0-NEXT:    mov r2, #3
+; CHECK-O0-NEXT:    mov r3, #4
+; CHECK-O0-NEXT:    mov r10, r8
+; CHECK-O0-NEXT:    bl _params_in_reg2
+; CHECK-O0-NEXT:    ldr r10, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r9, r8
+; CHECK-O0-NEXT:    ldr r8, [sp, #20] @ 4-byte Reload
+; CHECK-O0-NEXT:    bl _params_in_reg2
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r10}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: params_in_reg:
+; CHECK-ANDROID:       @ %bb.0:
+; CHECK-ANDROID-NEXT:    .save {r4, r5, r6, r7, r9, r10, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r5, r6, r7, r9, r10, r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #8
+; CHECK-ANDROID-NEXT:    sub sp, sp, #8
+; CHECK-ANDROID-NEXT:    mov r9, r8
+; CHECK-ANDROID-NEXT:    mov r11, r10
+; CHECK-ANDROID-NEXT:    mov r6, r3
+; CHECK-ANDROID-NEXT:    mov r7, r2
+; CHECK-ANDROID-NEXT:    mov r4, r1
+; CHECK-ANDROID-NEXT:    mov r5, r0
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    mov r1, #2
+; CHECK-ANDROID-NEXT:    mov r2, #3
+; CHECK-ANDROID-NEXT:    mov r3, #4
+; CHECK-ANDROID-NEXT:    mov r10, #0
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    bl params_in_reg2
+; CHECK-ANDROID-NEXT:    mov r0, r5
+; CHECK-ANDROID-NEXT:    mov r1, r4
+; CHECK-ANDROID-NEXT:    mov r2, r7
+; CHECK-ANDROID-NEXT:    mov r3, r6
+; CHECK-ANDROID-NEXT:    mov r10, r11
+; CHECK-ANDROID-NEXT:    mov r8, r9
+; CHECK-ANDROID-NEXT:    bl params_in_reg2
+; CHECK-ANDROID-NEXT:    add sp, sp, #8
+; CHECK-ANDROID-NEXT:    pop {r4, r5, r6, r7, r9, r10, r11, pc}
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i32 1, i32 2, i32 3, i32 4, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -444,105 +1134,162 @@ define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_err
 }
 declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err)
 
-; CHECK-ARMV7-LABEL: params_and_return_in_reg
-; CHECK-ARMV7:  push    {r4, r5, r6, r7, r10, r11, lr}
-; Store swifterror and swiftself
-; CHECK-ARMV7:  mov     r6, r8
-; CHECK-ARMV7:  str     r10, [s[[STK1:.*]]]
-; Store arguments.
-; CHECK-ARMV7:  str     r3, [s[[STK2:.*]]]
-; CHECK-ARMV7:  mov     r4, r2
-; CHECK-ARMV7:  mov     r11, r1
-; CHECK-ARMV7:  mov     r5, r0
-; Setup call.
-; CHECK-ARMV7:  mov     r0, #1
-; CHECK-ARMV7:  mov     r1, #2
-; CHECK-ARMV7:  mov     r2, #3
-; CHECK-ARMV7:  mov     r3, #4
-; CHECK-ARMV7:  mov     r10, #0
-; CHECK-ARMV7:  mov     r8, #0
-; CHECK-ARMV7:  bl      _params_in_reg2
-; Restore original arguments.
-; CHECK-ARMV7-DAG:  ldr     r3, [s[[STK2]]]
-; CHECK-ARMV7-DAG:  ldr     r10, [s[[STK1]]]
-; Store %error_ptr_ref;
-; CHECK-ARMV7-DAG:  str     r8, [s[[STK3:.*]]]
-; Restore original arguments.
-; CHECK-ARMV7-DAG:  mov     r0, r5
-; CHECK-ARMV7-DAG:  mov     r1, r11
-; CHECK-ARMV7-DAG:  mov     r2, r4
-; CHECK-ARMV7-DAG:  mov     r8, r6
-; CHECK-ARMV7:  bl      _params_and_return_in_reg2
-; Store swifterror return %err;
-; CHECK-ARMV7-DAG:  str     r8, [s[[STK1]]]
-; Load swifterror value %error_ptr_ref.
-; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK3]]]
-; Save return values.
-; CHECK-ARMV7-DAG:  mov     r4, r0
-; CHECK-ARMV7-DAG:  mov     r5, r1
-; CHECK-ARMV7-DAG:  mov     r6, r2
-; CHECK-ARMV7-DAG:  mov     r11, r3
-; Setup call.
-; CHECK-ARMV7:  mov     r0, #1
-; CHECK-ARMV7:  mov     r1, #2
-; CHECK-ARMV7:  mov     r2, #3
-; CHECK-ARMV7:  mov     r3, #4
-; CHECK-ARMV7:  mov     r10, #0
-; CHECK-ARMV7:  bl      _params_in_reg2
-; Load swifterror %err;
-; CHECK-ARMV7-DAG:  ldr     r8, [s[[STK1]]]
-; Restore return values for returning.
-; CHECK-ARMV7-DAG:  mov     r0, r4
-; CHECK-ARMV7-DAG:  mov     r1, r5
-; CHECK-ARMV7-DAG:  mov     r2, r6
-; CHECK-ARMV7-DAG:  mov     r3, r11
-; CHECK-ARMV7:  pop     {r4, r5, r6, r7, r10, r11, pc}
-
-; CHECK-ANDROID-LABEL: params_and_return_in_reg
-; CHECK-ANDROID:  push    {r4, r5, r6, r7, r9, r10, r11, lr}
-; CHECK-ANDROID:  sub     sp, sp, #16
-; CHECK-ANDROID:  str     r8, [sp, #4]            @ 4-byte Spill
-; CHECK-ANDROID:  mov     r11, r10
-; CHECK-ANDROID:  mov     r6, r3
-; CHECK-ANDROID:  mov     r7, r2
-; CHECK-ANDROID:  mov     r4, r1
-; CHECK-ANDROID:  mov     r5, r0
-; CHECK-ANDROID:  mov     r0, #1
-; CHECK-ANDROID:  mov     r1, #2
-; CHECK-ANDROID:  mov     r2, #3
-; CHECK-ANDROID:  mov     r3, #4
-; CHECK-ANDROID:  mov     r10, #0
-; CHECK-ANDROID:  mov     r8, #0
-; CHECK-ANDROID:  bl      params_in_reg2
-; CHECK-ANDROID:  mov     r9, r8
-; CHECK-ANDROID:  ldr     r8, [sp, #4]            @ 4-byte Reload
-; CHECK-ANDROID:  mov     r0, r5
-; CHECK-ANDROID:  mov     r1, r4
-; CHECK-ANDROID:  mov     r2, r7
-; CHECK-ANDROID:  mov     r3, r6
-; CHECK-ANDROID:  mov     r10, r11
-; CHECK-ANDROID:  bl      params_and_return_in_reg2
-; CHECK-ANDROID:  mov     r4, r0
-; CHECK-ANDROID:  mov     r5, r1
-; CHECK-ANDROID:  mov     r6, r2
-; CHECK-ANDROID:  mov     r7, r3
-; CHECK-ANDROID:  mov     r11, r8
-; CHECK-ANDROID:  mov     r0, #1
-; CHECK-ANDROID:  mov     r1, #2
-; CHECK-ANDROID:  mov     r2, #3
-; CHECK-ANDROID:  mov     r3, #4
-; CHECK-ANDROID:  mov     r10, #0
-; CHECK-ANDROID:  mov     r8, r9
-; CHECK-ANDROID:  bl      params_in_reg2
-; CHECK-ANDROID:  mov     r0, r4
-; CHECK-ANDROID:  mov     r1, r5
-; CHECK-ANDROID:  mov     r2, r6
-; CHECK-ANDROID:  mov     r3, r7
-; CHECK-ANDROID:  mov     r8, r11
-; CHECK-ANDROID:  add     sp, sp, #16
-; CHECK-ANDROID:  pop	{r4, r5, r6, r7, r9, r10, r11, pc}
-
 define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_and_return_in_reg:
+; CHECK-APPLE:       @ %bb.0:
+; CHECK-APPLE-NEXT:    push {r4, r5, r6, r7, r10, r11, lr}
+; CHECK-APPLE-NEXT:    add r7, sp, #20
+; CHECK-APPLE-NEXT:    sub sp, sp, #20
+; CHECK-APPLE-NEXT:    bfc sp, #0, #3
+; CHECK-APPLE-NEXT:    mov r6, r8
+; CHECK-APPLE-NEXT:    str r10, [sp, #12] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    str r3, [sp, #8] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    mov r4, r2
+; CHECK-APPLE-NEXT:    mov r11, r1
+; CHECK-APPLE-NEXT:    mov r5, r0
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    mov r1, #2
+; CHECK-APPLE-NEXT:    mov r2, #3
+; CHECK-APPLE-NEXT:    mov r3, #4
+; CHECK-APPLE-NEXT:    mov r10, #0
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    mov r0, r5
+; CHECK-APPLE-NEXT:    ldr r10, [sp, #12] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    mov r1, r11
+; CHECK-APPLE-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    mov r2, r4
+; CHECK-APPLE-NEXT:    mov r8, r6
+; CHECK-APPLE-NEXT:    bl _params_and_return_in_reg2
+; CHECK-APPLE-NEXT:    str r8, [sp, #12] @ 4-byte Spill
+; CHECK-APPLE-NEXT:    mov r4, r0
+; CHECK-APPLE-NEXT:    ldr r8, [sp, #4] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    mov r5, r1
+; CHECK-APPLE-NEXT:    mov r6, r2
+; CHECK-APPLE-NEXT:    mov r11, r3
+; CHECK-APPLE-NEXT:    mov r0, #1
+; CHECK-APPLE-NEXT:    mov r1, #2
+; CHECK-APPLE-NEXT:    mov r2, #3
+; CHECK-APPLE-NEXT:    mov r3, #4
+; CHECK-APPLE-NEXT:    mov r10, #0
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    mov r0, r4
+; CHECK-APPLE-NEXT:    mov r1, r5
+; CHECK-APPLE-NEXT:    mov r2, r6
+; CHECK-APPLE-NEXT:    mov r3, r11
+; CHECK-APPLE-NEXT:    ldr r8, [sp, #12] @ 4-byte Reload
+; CHECK-APPLE-NEXT:    sub sp, r7, #20
+; CHECK-APPLE-NEXT:    pop {r4, r5, r6, r7, r10, r11, pc}
+;
+; CHECK-O0-LABEL: params_and_return_in_reg:
+; CHECK-O0:       @ %bb.0:
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r10}
+; CHECK-O0-NEXT:    sub sp, sp, #76
+; CHECK-O0-NEXT:    bfc sp, #0, #3
+; CHECK-O0-NEXT:    str r8, [sp, #24] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r10, [sp, #4] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-O0-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-O0-NEXT:    @ implicit-def: $r0
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    str r8, [sp, #28] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, #1
+; CHECK-O0-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r1, #2
+; CHECK-O0-NEXT:    str r1, [sp, #36] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r2, #3
+; CHECK-O0-NEXT:    str r2, [sp, #40] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r3, #4
+; CHECK-O0-NEXT:    str r3, [sp, #44] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r10, r8
+; CHECK-O0-NEXT:    bl _params_in_reg2
+; CHECK-O0-NEXT:    ldr r10, [sp, #4] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r9, r8
+; CHECK-O0-NEXT:    ldr r8, [sp, #24] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #48] @ 4-byte Spill
+; CHECK-O0-NEXT:    bl _params_and_return_in_reg2
+; CHECK-O0-NEXT:    ldr r10, [sp, #28] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r9, r0
+; CHECK-O0-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #52] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r9, r1
+; CHECK-O0-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #56] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r9, r2
+; CHECK-O0-NEXT:    ldr r2, [sp, #40] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #60] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r9, r3
+; CHECK-O0-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #64] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r9, r8
+; CHECK-O0-NEXT:    ldr r8, [sp, #48] @ 4-byte Reload
+; CHECK-O0-NEXT:    str r9, [sp, #68] @ 4-byte Spill
+; CHECK-O0-NEXT:    bl _params_in_reg2
+; CHECK-O0-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r1, [sp, #56] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r2, [sp, #60] @ 4-byte Reload
+; CHECK-O0-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov r9, r8
+; CHECK-O0-NEXT:    ldr r8, [sp, #68] @ 4-byte Reload
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r10}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: params_and_return_in_reg:
+; CHECK-ANDROID:       @ %bb.0:
+; CHECK-ANDROID-NEXT:    .save {r4, r5, r6, r7, r9, r10, r11, lr}
+; CHECK-ANDROID-NEXT:    push {r4, r5, r6, r7, r9, r10, r11, lr}
+; CHECK-ANDROID-NEXT:    .pad #16
+; CHECK-ANDROID-NEXT:    sub sp, sp, #16
+; CHECK-ANDROID-NEXT:    str r8, [sp, #4] @ 4-byte Spill
+; CHECK-ANDROID-NEXT:    mov r11, r10
+; CHECK-ANDROID-NEXT:    mov r6, r3
+; CHECK-ANDROID-NEXT:    mov r7, r2
+; CHECK-ANDROID-NEXT:    mov r4, r1
+; CHECK-ANDROID-NEXT:    mov r5, r0
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    mov r1, #2
+; CHECK-ANDROID-NEXT:    mov r2, #3
+; CHECK-ANDROID-NEXT:    mov r3, #4
+; CHECK-ANDROID-NEXT:    mov r10, #0
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    bl params_in_reg2
+; CHECK-ANDROID-NEXT:    mov r9, r8
+; CHECK-ANDROID-NEXT:    ldr r8, [sp, #4] @ 4-byte Reload
+; CHECK-ANDROID-NEXT:    mov r0, r5
+; CHECK-ANDROID-NEXT:    mov r1, r4
+; CHECK-ANDROID-NEXT:    mov r2, r7
+; CHECK-ANDROID-NEXT:    mov r3, r6
+; CHECK-ANDROID-NEXT:    mov r10, r11
+; CHECK-ANDROID-NEXT:    bl params_and_return_in_reg2
+; CHECK-ANDROID-NEXT:    mov r4, r0
+; CHECK-ANDROID-NEXT:    mov r5, r1
+; CHECK-ANDROID-NEXT:    mov r6, r2
+; CHECK-ANDROID-NEXT:    mov r7, r3
+; CHECK-ANDROID-NEXT:    mov r11, r8
+; CHECK-ANDROID-NEXT:    mov r0, #1
+; CHECK-ANDROID-NEXT:    mov r1, #2
+; CHECK-ANDROID-NEXT:    mov r2, #3
+; CHECK-ANDROID-NEXT:    mov r3, #4
+; CHECK-ANDROID-NEXT:    mov r10, #0
+; CHECK-ANDROID-NEXT:    mov r8, r9
+; CHECK-ANDROID-NEXT:    bl params_in_reg2
+; CHECK-ANDROID-NEXT:    mov r0, r4
+; CHECK-ANDROID-NEXT:    mov r1, r5
+; CHECK-ANDROID-NEXT:    mov r2, r6
+; CHECK-ANDROID-NEXT:    mov r3, r7
+; CHECK-ANDROID-NEXT:    mov r8, r11
+; CHECK-ANDROID-NEXT:    add sp, sp, #16
+; CHECK-ANDROID-NEXT:    pop {r4, r5, r6, r7, r9, r10, r11, pc}
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i32 1, i32 2, i32 3, i32 4, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -558,14 +1305,37 @@ declare void @acallee(i8*)
 
 ; Make sure we don't tail call if the caller returns a swifterror value. We
 ; would have to move into the swifterror register before the tail call.
-; CHECK-APPLE: tailcall_from_swifterror:
-; CHECK-APPLE-NOT: b _acallee
-; CHECK-APPLE: bl _acallee
-; CHECK-ANDROID: tailcall_from_swifterror:
-; CHECK-ANDROID-NOT: b acallee
-; CHECK-ANDROID: bl acallee
-
 define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcall_from_swifterror:
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r4, lr}
+; CHECK-APPLE-NEXT:    mov r0, #0
+; CHECK-APPLE-NEXT:    mov r4, r8
+; CHECK-APPLE-NEXT:    bl _acallee
+; CHECK-APPLE-NEXT:    mov r8, r4
+; CHECK-APPLE-NEXT:    pop {r4, pc}
+;
+; CHECK-O0-LABEL: tailcall_from_swifterror:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    sub sp, sp, #4
+; CHECK-O0-NEXT:    str r8, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:    mov r0, #0
+; CHECK-O0-NEXT:    bl _acallee
+; CHECK-O0-NEXT:    ldr r8, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    mov sp, r7
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: tailcall_from_swifterror:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r4, lr}
+; CHECK-ANDROID-NEXT:    push {r4, lr}
+; CHECK-ANDROID-NEXT:    mov r0, #0
+; CHECK-ANDROID-NEXT:    mov r4, r8
+; CHECK-ANDROID-NEXT:    bl acallee
+; CHECK-ANDROID-NEXT:    mov r8, r4
+; CHECK-ANDROID-NEXT:    pop {r4, pc}
 entry:
   tail call void @acallee(i8* null)
   ret void
@@ -575,24 +1345,44 @@ entry:
 declare swiftcc void @foo2(%swift_error** swifterror)
 
 ; Make sure we properly assign registers during fast-isel.
-; CHECK-O0-LABEL: testAssign
-; CHECK-O0: mov     r8, #0
-; CHECK-O0: bl      _foo2
-; CHECK-O0: str     r8, [s[[STK:p.*]]]
-; CHECK-O0: ldr     r0, [s[[STK]]]
-; CHECK-O0: pop
-
-; CHECK-APPLE-LABEL: testAssign
-; CHECK-APPLE:  mov     r8, #0
-; CHECK-APPLE:  bl      _foo2
-; CHECK-APPLE:  mov     r0, r8
-
-; CHECK-ANDROID-LABEL: testAssign
-; CHECK-ANDROID:  mov     r8, #0
-; CHECK-ANDROID:  bl      foo2
-; CHECK-ANDROID:  mov     r0, r8
-
 define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+; CHECK-APPLE-LABEL: testAssign:
+; CHECK-APPLE:       @ %bb.0: @ %entry
+; CHECK-APPLE-NEXT:    push {r8, lr}
+; CHECK-APPLE-NEXT:    sub sp, sp, #4
+; CHECK-APPLE-NEXT:    mov r8, #0
+; CHECK-APPLE-NEXT:    bl _foo2
+; CHECK-APPLE-NEXT:    mov r0, r8
+; CHECK-APPLE-NEXT:    add sp, sp, #4
+; CHECK-APPLE-NEXT:    pop {r8, pc}
+;
+; CHECK-O0-LABEL: testAssign:
+; CHECK-O0:       @ %bb.0: @ %entry
+; CHECK-O0-NEXT:    push {r7, lr}
+; CHECK-O0-NEXT:    mov r7, sp
+; CHECK-O0-NEXT:    push {r8}
+; CHECK-O0-NEXT:    sub sp, sp, #8
+; CHECK-O0-NEXT:    @ implicit-def: $r1
+; CHECK-O0-NEXT:    mov r8, #0
+; CHECK-O0-NEXT:    bl _foo2
+; CHECK-O0-NEXT:    str r8, [sp] @ 4-byte Spill
+; CHECK-O0-NEXT:  @ %bb.1: @ %a
+; CHECK-O0-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; CHECK-O0-NEXT:    sub sp, r7, #4
+; CHECK-O0-NEXT:    pop {r8}
+; CHECK-O0-NEXT:    pop {r7, pc}
+;
+; CHECK-ANDROID-LABEL: testAssign:
+; CHECK-ANDROID:       @ %bb.0: @ %entry
+; CHECK-ANDROID-NEXT:    .save {r8, lr}
+; CHECK-ANDROID-NEXT:    push {r8, lr}
+; CHECK-ANDROID-NEXT:    .pad #8
+; CHECK-ANDROID-NEXT:    sub sp, sp, #8
+; CHECK-ANDROID-NEXT:    mov r8, #0
+; CHECK-ANDROID-NEXT:    bl foo2
+; CHECK-ANDROID-NEXT:    mov r0, r8
+; CHECK-ANDROID-NEXT:    add sp, sp, #8
+; CHECK-ANDROID-NEXT:    pop {r8, pc}
 entry:
   %error_ptr = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr

diff  --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index ac23473b8eccd..e342f33fb2194 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
 ; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-APPLE %s
 ; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=x86_64-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-O0 %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-block-placement | FileCheck --check-prefix=CHECK-i386 %s
@@ -10,16 +11,46 @@ declare void @free(i8*)
 ; that takes a swifterror parameter and "caller" is the caller of "foo".
 define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo:
-; CHECK-APPLE: movl $16, %edi
-; CHECK-APPLE: malloc
-; CHECK-APPLE: movb $1, 8(%rax)
-; CHECK-APPLE: movq %rax, %r12
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    movl $16, %edi
+; CHECK-APPLE-NEXT:    callq _malloc
+; CHECK-APPLE-NEXT:    movb $1, 8(%rax)
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: foo:
-; CHECK-O0: movl $16
-; CHECK-O0: malloc
-; CHECK-O0: movq %{{.*}}, %r12
-; CHECK-O0: movb $1, 8(%rax)
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    callq _malloc
+; CHECK-O0-NEXT:    movq %rax, %r12
+; CHECK-O0-NEXT:    movb $1, 8(%rax)
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: foo:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    subl $8, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    .cfi_offset %esi, -8
+; CHECK-i386-NEXT:    movl 16(%esp), %esi
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $16, (%esp)
+; CHECK-i386-NEXT:    calll _malloc
+; CHECK-i386-NEXT:    movl %eax, (%esi)
+; CHECK-i386-NEXT:    movb $1, 8(%eax)
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    addl $8, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    retl
 
 entry:
   %call = call i8* @malloc(i64 16)
@@ -33,19 +64,86 @@ entry:
 ; "caller" calls "foo" that takes a swifterror parameter.
 define float @caller(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller:
-; CHECK-APPLE: xorl %r12d, %r12d
-; CHECK-APPLE: callq {{.*}}foo
-; CHECK-APPLE: testq %r12, %r12
-; CHECK-APPLE: jne
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -16
+; CHECK-APPLE-NEXT:    movq %rdi, %rbx
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo
+; CHECK-APPLE-NEXT:    movq %r12, %rdi
+; CHECK-APPLE-NEXT:    testq %r12, %r12
+; CHECK-APPLE-NEXT:    jne LBB1_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
+; CHECK-APPLE-NEXT:    movb %al, (%rbx)
+; CHECK-APPLE-NEXT:  LBB1_2: ## %handler
+; CHECK-APPLE-NEXT:    callq _free
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    addq $8, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: caller:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $32, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT:    .cfi_offset %r12, -16
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    movq %rdi, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    callq _foo
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpq $0, %r12
+; CHECK-O0-NEXT:    jne LBB1_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %cont
+; CHECK-O0-NEXT:    movq (%rsp), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movb 8(%rcx), %cl
+; CHECK-O0-NEXT:    movb %cl, (%rax)
+; CHECK-O0-NEXT:  LBB1_2: ## %handler
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _free
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    addq $32, %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: caller:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    movl $0, 8(%esp)
+; CHECK-i386-NEXT:    leal 8(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _foo
+; CHECK-i386-NEXT:    fstp %st(0)
+; CHECK-i386-NEXT:    movl 8(%esp), %eax
+; CHECK-i386-NEXT:    testl %eax, %eax
+; CHECK-i386-NEXT:    jne LBB1_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %cont
+; CHECK-i386-NEXT:    movl 16(%esp), %ecx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
+; CHECK-i386-NEXT:    movb %dl, (%ecx)
+; CHECK-i386-NEXT:  LBB1_2: ## %handler
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _free
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%rdi)
-; CHECK-APPLE: callq {{.*}}free
 
-; CHECK-O0-LABEL: caller:
-; CHECK-O0: xorl
-; CHECK-O0: movl %{{.*}}, %r12d
-; CHECK-O0: callq {{.*}}foo
-; CHECK-O0: jne
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -67,24 +165,124 @@ handler:
 ; "caller2" is the caller of "foo", it calls "foo" inside a loop.
 define float @caller2(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller2:
-; CHECK-APPLE: xorl %r12d, %r12d
-; CHECK-APPLE: callq {{.*}}foo
-; CHECK-APPLE: testq %r12, %r12
-; CHECK-APPLE: jne
-; CHECK-APPLE: ucomiss
-; CHECK-APPLE: jbe
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -16
+; CHECK-APPLE-NEXT:    movq %rdi, %rbx
+; CHECK-APPLE-NEXT:  LBB2_1: ## %bb_loop
+; CHECK-APPLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo
+; CHECK-APPLE-NEXT:    testq %r12, %r12
+; CHECK-APPLE-NEXT:    jne LBB2_4
+; CHECK-APPLE-NEXT:  ## %bb.2: ## %cont
+; CHECK-APPLE-NEXT:    ## in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-APPLE-NEXT:    jbe LBB2_1
+; CHECK-APPLE-NEXT:  ## %bb.3: ## %bb_end
+; CHECK-APPLE-NEXT:    movb 8(%r12), %al
+; CHECK-APPLE-NEXT:    movb %al, (%rbx)
+; CHECK-APPLE-NEXT:  LBB2_4: ## %handler
+; CHECK-APPLE-NEXT:    movq %r12, %rdi
+; CHECK-APPLE-NEXT:    callq _free
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    addq $8, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: caller2:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $48, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-NEXT:    .cfi_offset %r12, -16
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:  LBB2_1: ## %bb_loop
+; CHECK-O0-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    callq _foo
+; CHECK-O0-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpq $0, %r12
+; CHECK-O0-NEXT:    jne LBB2_4
+; CHECK-O0-NEXT:  ## %bb.2: ## %cont
+; CHECK-O0-NEXT:    ## in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload
+; CHECK-O0-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    ucomiss %xmm1, %xmm0
+; CHECK-O0-NEXT:    jbe LBB2_1
+; CHECK-O0-NEXT:  ## %bb.3: ## %bb_end
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movb 8(%rcx), %cl
+; CHECK-O0-NEXT:    movb %cl, (%rax)
+; CHECK-O0-NEXT:  LBB2_4: ## %handler
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _free
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    addq $48, %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: caller2:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    subl $20, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-i386-NEXT:    .cfi_offset %esi, -12
+; CHECK-i386-NEXT:    .cfi_offset %edi, -8
+; CHECK-i386-NEXT:    movl 32(%esp), %esi
+; CHECK-i386-NEXT:    leal 16(%esp), %edi
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-i386-NEXT:  LBB2_1: ## %bb_loop
+; CHECK-i386-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-i386-NEXT:    movl $0, 16(%esp)
+; CHECK-i386-NEXT:    movl %edi, (%esp)
+; CHECK-i386-NEXT:    calll _foo
+; CHECK-i386-NEXT:    movl 16(%esp), %ecx
+; CHECK-i386-NEXT:    testl %ecx, %ecx
+; CHECK-i386-NEXT:    jne LBB2_4
+; CHECK-i386-NEXT:  ## %bb.2: ## %cont
+; CHECK-i386-NEXT:    ## in Loop: Header=BB2_1 Depth=1
+; CHECK-i386-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-i386-NEXT:    fxch %st(1)
+; CHECK-i386-NEXT:    fucompp
+; CHECK-i386-NEXT:    fnstsw %ax
+; CHECK-i386-NEXT:    ## kill: def $ah killed $ah killed $ax
+; CHECK-i386-NEXT:    sahf
+; CHECK-i386-NEXT:    jbe LBB2_1
+; CHECK-i386-NEXT:  ## %bb.3: ## %bb_end
+; CHECK-i386-NEXT:    movb 8(%ecx), %al
+; CHECK-i386-NEXT:    movb %al, (%esi)
+; CHECK-i386-NEXT:    fldz
+; CHECK-i386-NEXT:  LBB2_4: ## %handler
+; CHECK-i386-NEXT:    fstp %st(0)
+; CHECK-i386-NEXT:    movl %ecx, (%esp)
+; CHECK-i386-NEXT:    calll _free
+; CHECK-i386-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-i386-NEXT:    addl $20, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    retl
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%r12)
-; CHECK-APPLE: movq %r12, %rdi
-; CHECK-APPLE: callq {{.*}}free
 
-; CHECK-O0-LABEL: caller2:
-; CHECK-O0: xorl
-; CHECK-O0: movl %{{.*}}, %r12d
-; CHECK-O0: callq {{.*}}foo
-; CHECK-O0: movq %r12, [[ID:%[a-z]+]]
-; CHECK-O0: cmpq $0, %r12
-; CHECK-O0: jne
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -112,28 +310,71 @@ handler:
 ; under a certain condition.
 define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: foo_if:
-; CHECK-APPLE: testl %edi, %edi
-; CHECK-APPLE: je
-; CHECK-APPLE: movl $16, %edi
-; CHECK-APPLE: malloc
-; CHECK-APPLE: movb $1, 8(%rax)
-; CHECK-APPLE: movq %rax, %r12
-; CHECK-APPLE-NOT: %r12
-; CHECK-APPLE: ret
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    testl %edi, %edi
+; CHECK-APPLE-NEXT:    je LBB3_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    movl $16, %edi
+; CHECK-APPLE-NEXT:    callq _malloc
+; CHECK-APPLE-NEXT:    movb $1, 8(%rax)
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+; CHECK-APPLE-NEXT:  LBB3_2: ## %normal
+; CHECK-APPLE-NEXT:    xorps %xmm0, %xmm0
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: foo_if:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpl $0, %edi
+; CHECK-O0-NEXT:    je LBB3_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    callq _malloc
+; CHECK-O0-NEXT:    movq %rax, %r12
+; CHECK-O0-NEXT:    movb $1, 8(%rax)
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+; CHECK-O0-NEXT:  LBB3_2: ## %normal
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    xorps %xmm0, %xmm0
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: foo_if:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    subl $8, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    .cfi_offset %esi, -8
+; CHECK-i386-NEXT:    cmpl $0, 20(%esp)
+; CHECK-i386-NEXT:    je LBB3_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-i386-NEXT:    movl 16(%esp), %esi
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $16, (%esp)
+; CHECK-i386-NEXT:    calll _malloc
+; CHECK-i386-NEXT:    movl %eax, (%esi)
+; CHECK-i386-NEXT:    movb $1, 8(%eax)
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    jmp LBB3_3
+; CHECK-i386-NEXT:  LBB3_2: ## %normal
+; CHECK-i386-NEXT:    fldz
+; CHECK-i386-NEXT:  LBB3_3: ## %normal
+; CHECK-i386-NEXT:    addl $8, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    retl
+
 ; spill to stack
-; CHECK-O0: movq %r12, {{.*}}(%rsp)
-; CHECK-O0: cmpl $0
-; CHECK-O0: je
-; CHECK-O0: movl $16,
-; CHECK-O0: malloc
-; CHECK-O0: movq %rax, %r12
-; CHECK-O0-DAG: movb $1, 8(%rax)
-; CHECK-O0: ret
 ; reload from stack
-; CHECK-O0: movq {{.*}}(%rsp), %r12
-; CHECK-O0: ret
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -154,30 +395,121 @@ normal:
 ; under a certain condition inside a loop.
 define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-APPLE-LABEL: foo_loop:
-; CHECK-APPLE: movq %r12, %rax
-; CHECK-APPLE: testl
-; CHECK-APPLE: je
-; CHECK-APPLE: movl $16, %edi
-; CHECK-APPLE: malloc
-; CHECK-APPLE: movb $1, 8(%rax)
-; CHECK-APPLE: ucomiss
-; CHECK-APPLE: jbe
-; CHECK-APPLE: movq %rax, %r12
-; CHECK-APPLE: ret
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    subq $16, %rsp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -16
+; CHECK-APPLE-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-APPLE-NEXT:    movl %edi, %ebx
+; CHECK-APPLE-NEXT:    movq %r12, %rax
+; CHECK-APPLE-NEXT:  LBB4_1: ## %bb_loop
+; CHECK-APPLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    testl %ebx, %ebx
+; CHECK-APPLE-NEXT:    je LBB4_3
+; CHECK-APPLE-NEXT:  ## %bb.2: ## %gen_error
+; CHECK-APPLE-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-APPLE-NEXT:    movl $16, %edi
+; CHECK-APPLE-NEXT:    callq _malloc
+; CHECK-APPLE-NEXT:    movb $1, 8(%rax)
+; CHECK-APPLE-NEXT:  LBB4_3: ## %bb_cont
+; CHECK-APPLE-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-APPLE-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload
+; CHECK-APPLE-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-APPLE-NEXT:    jbe LBB4_1
+; CHECK-APPLE-NEXT:  ## %bb.4: ## %bb_end
+; CHECK-APPLE-NEXT:    xorps %xmm0, %xmm0
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    addq $16, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: foo_loop:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    subq $40, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-O0-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    jmp LBB4_1
+; CHECK-O0-NEXT:  LBB4_1: ## %bb_loop
+; CHECK-O0-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    cmpl $0, %ecx
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    je LBB4_3
+; CHECK-O0-NEXT:  ## %bb.2: ## %gen_error
+; CHECK-O0-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    callq _malloc
+; CHECK-O0-NEXT:    movq %rax, %rcx
+; CHECK-O0-NEXT:    movb $1, 8(%rcx)
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:  LBB4_3: ## %bb_cont
+; CHECK-O0-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Reload
+; CHECK-O0-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    ucomiss %xmm1, %xmm0
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    jbe LBB4_1
+; CHECK-O0-NEXT:  ## %bb.4: ## %bb_end
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    xorps %xmm0, %xmm0
+; CHECK-O0-NEXT:    addq $40, %rsp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: foo_loop:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    subl $20, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-i386-NEXT:    .cfi_offset %esi, -12
+; CHECK-i386-NEXT:    .cfi_offset %edi, -8
+; CHECK-i386-NEXT:    flds 40(%esp)
+; CHECK-i386-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-i386-NEXT:    movl 36(%esp), %esi
+; CHECK-i386-NEXT:    movl 32(%esp), %edi
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-i386-NEXT:  LBB4_1: ## %bb_loop
+; CHECK-i386-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-i386-NEXT:    testl %esi, %esi
+; CHECK-i386-NEXT:    je LBB4_3
+; CHECK-i386-NEXT:  ## %bb.2: ## %gen_error
+; CHECK-i386-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $16, (%esp)
+; CHECK-i386-NEXT:    calll _malloc
+; CHECK-i386-NEXT:    movl %eax, (%edi)
+; CHECK-i386-NEXT:    movb $1, 8(%eax)
+; CHECK-i386-NEXT:  LBB4_3: ## %bb_cont
+; CHECK-i386-NEXT:    ## in Loop: Header=BB4_1 Depth=1
+; CHECK-i386-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-i386-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-i386-NEXT:    fxch %st(1)
+; CHECK-i386-NEXT:    fucompp
+; CHECK-i386-NEXT:    fnstsw %ax
+; CHECK-i386-NEXT:    ## kill: def $ah killed $ah killed $ax
+; CHECK-i386-NEXT:    sahf
+; CHECK-i386-NEXT:    jbe LBB4_1
+; CHECK-i386-NEXT:  ## %bb.4: ## %bb_end
+; CHECK-i386-NEXT:    fldz
+; CHECK-i386-NEXT:    addl $20, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    retl
+
 ; spill to stack
-; CHECK-O0: movq %r12, {{.*}}(%rsp)
-; CHECK-O0: cmpl $0
-; CHECK-O0: je
-; CHECK-O0: movl $16,
-; CHECK-O0: malloc
-; CHECK-O0: movq %rax, [[ID:%[a-z0-9]+]]
-; CHECK-O0: movb $1, 8([[ID]])
-; CHECK-O0: jbe
 ; reload from stack
-; CHECK-O0: movq {{.*}}(%rsp), %r12
-; CHECK-O0: ret
 entry:
   br label %bb_loop
 
@@ -206,26 +538,78 @@ bb_end:
 ; parameter.
 define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo_sret:
-; CHECK-APPLE: movq %rdi, %{{.*}}
-; CHECK-APPLE: movl $16, %edi
-; CHECK-APPLE: malloc
-; CHECK-APPLE: movb $1, 8(%rax)
-; CHECK-APPLE: movl %{{.*}}, 4(%{{.*}})
-; CHECK-APPLE: movq %rax, %r12
-; CHECK-APPLE: movq %{{.*}}, %rax
-; CHECK-APPLE-NOT: x19
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rbp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %rbp, -16
+; CHECK-APPLE-NEXT:    movl %esi, %ebp
+; CHECK-APPLE-NEXT:    movq %rdi, %rbx
+; CHECK-APPLE-NEXT:    movl $16, %edi
+; CHECK-APPLE-NEXT:    callq _malloc
+; CHECK-APPLE-NEXT:    movb $1, 8(%rax)
+; CHECK-APPLE-NEXT:    movl %ebp, 4(%rbx)
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    movq %rbx, %rax
+; CHECK-APPLE-NEXT:    addq $8, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %rbp
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: foo_sret:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    subq $24, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    callq _malloc
+; CHECK-O0-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %esi ## 4-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, %rcx
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rcx, %r12
+; CHECK-O0-NEXT:    movb $1, 8(%rcx)
+; CHECK-O0-NEXT:    movl %esi, 4(%rdi)
+; CHECK-O0-NEXT:    addq $24, %rsp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: foo_sret:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %ebx
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    subl $16, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-i386-NEXT:    .cfi_offset %esi, -16
+; CHECK-i386-NEXT:    .cfi_offset %edi, -12
+; CHECK-i386-NEXT:    .cfi_offset %ebx, -8
+; CHECK-i386-NEXT:    movl 32(%esp), %esi
+; CHECK-i386-NEXT:    movl 36(%esp), %edi
+; CHECK-i386-NEXT:    movl 40(%esp), %ebx
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $16, (%esp)
+; CHECK-i386-NEXT:    calll _malloc
+; CHECK-i386-NEXT:    movl %eax, (%ebx)
+; CHECK-i386-NEXT:    movb $1, 8(%eax)
+; CHECK-i386-NEXT:    movl %edi, 4(%esi)
+; CHECK-i386-NEXT:    movl %esi, %eax
+; CHECK-i386-NEXT:    addl $16, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    popl %ebx
+; CHECK-i386-NEXT:    retl $4
+
 ; spill sret to stack
-; CHECK-O0: movq %rdi,
-; CHECK-O0: movl $16,
-; CHECK-O0: malloc
 ; reload sret from stack
-; CHECK-O0: movq {{.*}}(%rsp), %rax
-; CHECK-O0: movq %{{.*}}, %r12
-; CHECK-O0: movb $1, 8(%rcx)
-; CHECK-O0: movl %{{.*}}, 4(%{{.*}})
-; CHECK-O0: ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -240,31 +624,95 @@ entry:
 ; "caller3" calls "foo_sret" that takes a swifterror parameter.
 define float @caller3(i8* %error_ref) {
 ; CHECK-APPLE-LABEL: caller3:
-; CHECK-APPLE: movl $1, %esi
-; CHECK-APPLE: xorl %r12d, %r12d
-; CHECK-APPLE: callq {{.*}}foo_sret
-; CHECK-APPLE: testq %r12, %r12
-; CHECK-APPLE: jne
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    subq $40, %rsp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -16
+; CHECK-APPLE-NEXT:    movq %rdi, %rbx
+; CHECK-APPLE-NEXT:    leaq 8(%rsp), %rdi
+; CHECK-APPLE-NEXT:    movl $1, %esi
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo_sret
+; CHECK-APPLE-NEXT:    movq %r12, %rdi
+; CHECK-APPLE-NEXT:    testq %r12, %r12
+; CHECK-APPLE-NEXT:    jne LBB6_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
+; CHECK-APPLE-NEXT:    movb %al, (%rbx)
+; CHECK-APPLE-NEXT:  LBB6_2: ## %handler
+; CHECK-APPLE-NEXT:    callq _free
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    addq $40, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: caller3:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $64, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-O0-NEXT:    .cfi_offset %r12, -16
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    leaq 40(%rsp), %rdi
+; CHECK-O0-NEXT:    movl $1, %esi
+; CHECK-O0-NEXT:    callq _foo_sret
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpq $0, %r12
+; CHECK-O0-NEXT:    jne LBB6_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %cont
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movb 8(%rcx), %cl
+; CHECK-O0-NEXT:    movb %cl, (%rax)
+; CHECK-O0-NEXT:  LBB6_2: ## %handler
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _free
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    addq $64, %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: caller3:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $44, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-i386-NEXT:    movl $0, 12(%esp)
+; CHECK-i386-NEXT:    leal 12(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 8(%esp)
+; CHECK-i386-NEXT:    leal 16(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    movl $1, 4(%esp)
+; CHECK-i386-NEXT:    calll _foo_sret
+; CHECK-i386-NEXT:    subl $4, %esp
+; CHECK-i386-NEXT:    movl 12(%esp), %eax
+; CHECK-i386-NEXT:    testl %eax, %eax
+; CHECK-i386-NEXT:    jne LBB6_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %cont
+; CHECK-i386-NEXT:    movl 48(%esp), %ecx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
+; CHECK-i386-NEXT:    movb %dl, (%ecx)
+; CHECK-i386-NEXT:  LBB6_2: ## %handler
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _free
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    addl $44, %esp
+; CHECK-i386-NEXT:    retl
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%rdi),
-; CHECK-APPLE: movb %{{.*}},
-; CHECK-APPLE: callq {{.*}}free
 
-; CHECK-O0-LABEL: caller3:
-; CHECK-O0: xorl
-; CHECK-O0: movl {{.*}}, %r12d
-; CHECK-O0: leaq {{.*}}, %rdi
-; CHECK-O0: movl $1, %esi
-; CHECK-O0: callq {{.*}}foo_sret
-; CHECK-O0: movq %r12,
-; CHECK-O0: cmpq $0
-; CHECK-O0: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-O0: movb 8(%{{.*}}),
-; CHECK-O0: movb %{{.*}},
 ; reload from stack
-; CHECK-O0: movq {{.*}}(%rsp), %rdi
-; CHECK-O0: callq {{.*}}free
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -288,38 +736,173 @@ handler:
 ; time with a 
diff erent swifterror value, from "alloca swifterror".
 define float @caller_with_multiple_swifterror_values(i8* %error_ref, i8* %error_ref2) {
 ; CHECK-APPLE-LABEL: caller_with_multiple_swifterror_values:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rbp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    .cfi_offset %rbp, -16
+; CHECK-APPLE-NEXT:    movq %rsp, %rbp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-APPLE-NEXT:    pushq %r14
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -40
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -32
+; CHECK-APPLE-NEXT:    .cfi_offset %r14, -24
+; CHECK-APPLE-NEXT:    movq %rsi, %r14
+; CHECK-APPLE-NEXT:    movq %rdi, %rbx
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo
+; CHECK-APPLE-NEXT:    movq %r12, %rdi
+; CHECK-APPLE-NEXT:    testq %r12, %r12
+; CHECK-APPLE-NEXT:    jne LBB7_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %cont
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
+; CHECK-APPLE-NEXT:    movb %al, (%rbx)
+; CHECK-APPLE-NEXT:  LBB7_2: ## %handler
+; CHECK-APPLE-NEXT:    callq _free
+; CHECK-APPLE-NEXT:    movq %rsp, %rax
+; CHECK-APPLE-NEXT:    addq $-16, %rax
+; CHECK-APPLE-NEXT:    movq %rax, %rsp
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo
+; CHECK-APPLE-NEXT:    movq %r12, %rdi
+; CHECK-APPLE-NEXT:    testq %r12, %r12
+; CHECK-APPLE-NEXT:    jne LBB7_4
+; CHECK-APPLE-NEXT:  ## %bb.3: ## %cont2
+; CHECK-APPLE-NEXT:    movb 8(%rdi), %al
+; CHECK-APPLE-NEXT:    movb %al, (%r14)
+; CHECK-APPLE-NEXT:  LBB7_4: ## %handler2
+; CHECK-APPLE-NEXT:    callq _free
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    leaq -24(%rbp), %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    popq %r14
+; CHECK-APPLE-NEXT:    popq %rbp
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: caller_with_multiple_swifterror_values:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rbp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    .cfi_offset %rbp, -16
+; CHECK-O0-NEXT:    movq %rsp, %rbp
+; CHECK-O0-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    subq $56, %rsp
+; CHECK-O0-NEXT:    .cfi_offset %r12, -24
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    callq _foo
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpq $0, %r12
+; CHECK-O0-NEXT:    jne LBB7_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %cont
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movb 8(%rcx), %cl
+; CHECK-O0-NEXT:    movb %cl, (%rax)
+; CHECK-O0-NEXT:  LBB7_2: ## %handler
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _free
+; CHECK-O0-NEXT:    movq %rsp, %rax
+; CHECK-O0-NEXT:    addq $-16, %rax
+; CHECK-O0-NEXT:    movq %rax, %rsp
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    callq _foo
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpq $0, %r12
+; CHECK-O0-NEXT:    jne LBB7_4
+; CHECK-O0-NEXT:  ## %bb.3: ## %cont2
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movb 8(%rcx), %cl
+; CHECK-O0-NEXT:    movb %cl, (%rax)
+; CHECK-O0-NEXT:  LBB7_4: ## %handler2
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _free
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    leaq -8(%rbp), %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    popq %rbp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: caller_with_multiple_swifterror_values:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %ebp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    .cfi_offset %ebp, -8
+; CHECK-i386-NEXT:    movl %esp, %ebp
+; CHECK-i386-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    pushl %eax
+; CHECK-i386-NEXT:    .cfi_offset %esi, -12
+; CHECK-i386-NEXT:    movl $0, -8(%ebp)
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    leal -8(%ebp), %eax
+; CHECK-i386-NEXT:    pushl %eax
+; CHECK-i386-NEXT:    calll _foo
+; CHECK-i386-NEXT:    fstp %st(0)
+; CHECK-i386-NEXT:    addl $16, %esp
+; CHECK-i386-NEXT:    movl -8(%ebp), %eax
+; CHECK-i386-NEXT:    testl %eax, %eax
+; CHECK-i386-NEXT:    jne LBB7_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %cont
+; CHECK-i386-NEXT:    movl 8(%ebp), %ecx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
+; CHECK-i386-NEXT:    movb %dl, (%ecx)
+; CHECK-i386-NEXT:  LBB7_2: ## %handler
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    pushl %eax
+; CHECK-i386-NEXT:    calll _free
+; CHECK-i386-NEXT:    addl $16, %esp
+; CHECK-i386-NEXT:    movl %esp, %esi
+; CHECK-i386-NEXT:    leal -16(%esi), %eax
+; CHECK-i386-NEXT:    movl %eax, %esp
+; CHECK-i386-NEXT:    movl $0, -16(%esi)
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    pushl %eax
+; CHECK-i386-NEXT:    calll _foo
+; CHECK-i386-NEXT:    fstp %st(0)
+; CHECK-i386-NEXT:    addl $16, %esp
+; CHECK-i386-NEXT:    movl -16(%esi), %eax
+; CHECK-i386-NEXT:    testl %eax, %eax
+; CHECK-i386-NEXT:    jne LBB7_4
+; CHECK-i386-NEXT:  ## %bb.3: ## %cont2
+; CHECK-i386-NEXT:    movl 12(%ebp), %ecx
+; CHECK-i386-NEXT:    movb 8(%eax), %dl
+; CHECK-i386-NEXT:    movb %dl, (%ecx)
+; CHECK-i386-NEXT:  LBB7_4: ## %handler2
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    pushl %eax
+; CHECK-i386-NEXT:    calll _free
+; CHECK-i386-NEXT:    addl $16, %esp
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    leal -4(%ebp), %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %ebp
+; CHECK-i386-NEXT:    retl
 
 ; The first swifterror value:
-; CHECK-APPLE: xorl %r12d, %r12d
-; CHECK-APPLE: callq {{.*}}foo
-; CHECK-APPLE: testq %r12, %r12
-; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%rdi)
-; CHECK-APPLE: callq {{.*}}free
 
 ; The second swifterror value:
-; CHECK-APPLE: xorl %r12d, %r12d
-; CHECK-APPLE: callq {{.*}}foo
-; CHECK-APPLE: testq %r12, %r12
-; CHECK-APPLE: jne
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: movb 8(%rdi)
-; CHECK-APPLE: callq {{.*}}free
 
-; CHECK-O0-LABEL: caller_with_multiple_swifterror_values:
 
 ; The first swifterror value:
-; CHECK-O0: xorl
-; CHECK-O0: movl %{{.*}}, %r12d
-; CHECK-O0: callq {{.*}}foo
-; CHECK-O0: jne
 
 ; The second swifterror value:
-; CHECK-O0: xorl
-; CHECK-O0: movl %{{.*}}, %r12d
-; CHECK-O0: callq {{.*}}foo
-; CHECK-O0: jne
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -361,6 +944,98 @@ handler2:
 ; CHECK-APPLE: _swifterror_isel
 ; CHECK-O0: _swifterror_isel
 define void @swifterror_isel(%swift.refcounted*) {
+; CHECK-APPLE-LABEL: swifterror_isel:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %r13
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %r13, -16
+; CHECK-APPLE-NEXT:    xorl %eax, %eax
+; CHECK-APPLE-NEXT:    testb %al, %al
+; CHECK-APPLE-NEXT:    jne LBB8_3
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %.preheader
+; CHECK-APPLE-NEXT:    movq %rdi, %r13
+; CHECK-APPLE-NEXT:    ## implicit-def: $di
+; CHECK-APPLE-NEXT:    ## implicit-def: $r12
+; CHECK-APPLE-NEXT:  LBB8_2: ## =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    callq *%rax
+; CHECK-APPLE-NEXT:    movzwl (%rax), %edi
+; CHECK-APPLE-NEXT:    jmp LBB8_2
+; CHECK-APPLE-NEXT:  LBB8_3:
+; CHECK-APPLE-NEXT:    addq $8, %rsp
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    popq %r13
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: swifterror_isel:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O0-NEXT:    subq $40, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-NEXT:    .cfi_offset %r12, -24
+; CHECK-O0-NEXT:    .cfi_offset %r13, -16
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    ## implicit-def: $al
+; CHECK-O0-NEXT:    testb $1, %al
+; CHECK-O0-NEXT:    ## implicit-def: $ax
+; CHECK-O0-NEXT:    ## implicit-def: $r12
+; CHECK-O0-NEXT:    jne LBB8_2
+; CHECK-O0-NEXT:  LBB8_1: ## =>This Inner Loop Header: Depth=1
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movw {{[-0-9]+}}(%r{{[sb]}}p), %ax ## 2-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload
+; CHECK-O0-NEXT:    ## implicit-def: $edi
+; CHECK-O0-NEXT:    movw %ax, %di
+; CHECK-O0-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    callq *%rax
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    movw (%rax), %ax
+; CHECK-O0-NEXT:    movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    jmp LBB8_1
+; CHECK-O0-NEXT:  LBB8_2:
+; CHECK-O0-NEXT:    addq $40, %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    popq %r13
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: swifterror_isel:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    subl $20, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-i386-NEXT:    .cfi_offset %esi, -12
+; CHECK-i386-NEXT:    .cfi_offset %edi, -8
+; CHECK-i386-NEXT:    xorl %eax, %eax
+; CHECK-i386-NEXT:    testb %al, %al
+; CHECK-i386-NEXT:    jne LBB8_3
+; CHECK-i386-NEXT:  ## %bb.1: ## %.preheader
+; CHECK-i386-NEXT:    movl 32(%esp), %esi
+; CHECK-i386-NEXT:    leal 16(%esp), %edi
+; CHECK-i386-NEXT:    ## implicit-def: $ax
+; CHECK-i386-NEXT:  LBB8_2: ## =>This Inner Loop Header: Depth=1
+; CHECK-i386-NEXT:    movl %edi, 8(%esp)
+; CHECK-i386-NEXT:    movl %esi, 4(%esp)
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll *%eax
+; CHECK-i386-NEXT:    movzwl (%eax), %eax
+; CHECK-i386-NEXT:    jmp LBB8_2
+; CHECK-i386-NEXT:  LBB8_3:
+; CHECK-i386-NEXT:    addl $20, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    retl
 entry:
   %swifterror = alloca swifterror %swift_error*, align 8
   br i1 undef, label %5, label %1
@@ -376,16 +1051,47 @@ entry:
 ; This tests the basic usage of a swifterror parameter with swiftcc.
 define swiftcc float @foo_swiftcc(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo_swiftcc:
-; CHECK-APPLE: movl $16, %edi
-; CHECK-APPLE: malloc
-; CHECK-APPLE: movb $1, 8(%rax)
-; CHECK-APPLE: movq %rax, %r12
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    movl $16, %edi
+; CHECK-APPLE-NEXT:    callq _malloc
+; CHECK-APPLE-NEXT:    movb $1, 8(%rax)
+; CHECK-APPLE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: foo_swiftcc:
-; CHECK-O0: movl $16
-; CHECK-O0: malloc
-; CHECK-O0: movq %{{.*}}, %r12
-; CHECK-O0: movb $1, 8(%rax)
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    callq _malloc
+; CHECK-O0-NEXT:    movq %rax, %r12
+; CHECK-O0-NEXT:    movb $1, 8(%rax)
+; CHECK-O0-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: foo_swiftcc:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    subl $8, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    .cfi_offset %esi, -8
+; CHECK-i386-NEXT:    movl 16(%esp), %esi
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $16, (%esp)
+; CHECK-i386-NEXT:    calll _malloc
+; CHECK-i386-NEXT:    movl %eax, (%esi)
+; CHECK-i386-NEXT:    movb $1, 8(%eax)
+; CHECK-i386-NEXT:    fld1
+; CHECK-i386-NEXT:    addl $8, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    retl
+
 
 entry:
   %call = call i8* @malloc(i64 16)
@@ -401,16 +1107,31 @@ declare swiftcc float @moo(%swift_error** swifterror)
 ; Test parameter forwarding.
 define swiftcc float @forward_swifterror(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: forward_swifterror:
-; CHECK-APPLE: pushq %rax
-; CHECK-APPLE: callq _moo
-; CHECK-APPLE: popq %rax
-; CHECK-APPLE: retq
-
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _moo
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
 ; CHECK-O0-LABEL: forward_swifterror:
-; CHECK-O0: pushq %rax
-; CHECK-O0:  callq _moo
-; CHECK-O0: popq %rax
-; CHECK-O0:  retq
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    callq _moo
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: forward_swifterror:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    movl 16(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _moo
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
+
 
 entry:
   %call = call swiftcc float @moo(%swift_error** swifterror %error_ptr_ref)
@@ -419,32 +1140,58 @@ entry:
 
 define swiftcc float @conditionally_forward_swifterror(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: conditionally_forward_swifterror:
-; CHECK-APPLE:	testl %edi, %edi
-; CHECK-APPLE:  je
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    testl %edi, %edi
+; CHECK-APPLE-NEXT:    je LBB11_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _moo
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+; CHECK-APPLE-NEXT:  LBB11_2: ## %normal
+; CHECK-APPLE-NEXT:    xorps %xmm0, %xmm0
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: conditionally_forward_swifterror:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    cmpl $0, %edi
+; CHECK-O0-NEXT:    je LBB11_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _moo
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+; CHECK-O0-NEXT:  LBB11_2: ## %normal
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    xorps %xmm0, %xmm0
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: conditionally_forward_swifterror:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    cmpl $0, 20(%esp)
+; CHECK-i386-NEXT:    je LBB11_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %gen_error
+; CHECK-i386-NEXT:    movl 16(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _moo
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
+; CHECK-i386-NEXT:  LBB11_2: ## %normal
+; CHECK-i386-NEXT:    fldz
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
+
+
 
-; CHECK-APPLE:  pushq %rax
-; CHECK-APPLE:  callq _moo
-; CHECK-APPLE:  popq %rax
-; CHECK-APPLE:  retq
 
-; CHECK-APPLE:  xorps %xmm0, %xmm0
-; CHECK-APPLE:  retq
 
-; CHECK-O0-LABEL: conditionally_forward_swifterror:
-; CHECK-O0: pushq [[REG1:%[a-z0-9]+]]
-; CHECK-O0-DAG:  movq %r12, (%rsp)
-; CHECK-O0:  cmpl $0, %edi
-; CHECK-O0:  je
-
-; CHECK-O0:  movq (%rsp), %r12
-; CHECK-O0:  callq _moo
-; CHECK-O0:  popq [[REG1]]
-; CHECK-O0:  retq
-
-; CHECK-O0:  movq (%rsp), %r12
-; CHECK-O0:  xorps %xmm0, %xmm0
-; CHECK-O0:  popq [[REG1]]
-; CHECK-O0:  retq
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -459,44 +1206,122 @@ normal:
 
 ; Check that we don't blow up on tail calling swifterror argument functions.
 define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _tailcallswifterror
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: tailcallswifterror:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    callq _tailcallswifterror
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: tailcallswifterror:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    jmp _tailcallswifterror ## TAILCALL
 entry:
   %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror_swiftcc:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _tailcallswifterror_swiftcc
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    callq _tailcallswifterror_swiftcc
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: tailcallswifterror_swiftcc:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    jmp _tailcallswifterror_swiftcc ## TAILCALL
 entry:
   %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 
 ; Check that we can handle an empty function with swifterror argument.
-; CHECK-i386-LABEL: empty_swiftcc:
-; CHECK-i386:  movl    4(%esp), %eax
-; CHECK-i386:  movl    8(%esp), %edx
-; CHECK-i386:  movl    12(%esp), %ecx
-; CHECK-i386:  retl
-; CHECK-APPLE-LABEL: empty_swiftcc:
-; CHECK-APPLE:  movl    %edx, %ecx
-; CHECK-APPLE-DAG:  movl    %edi, %eax
-; CHECK-APPLE-DAG:  movl    %esi, %edx
-; CHECK-APPLE:  retq
 define swiftcc {i32, i32, i32} @empty_swiftcc({i32, i32, i32} , %swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: empty_swiftcc:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    movl %edx, %ecx
+; CHECK-APPLE-NEXT:    movl %esi, %edx
+; CHECK-APPLE-NEXT:    movl %edi, %eax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: empty_swiftcc:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    movl %edx, %ecx
+; CHECK-O0-NEXT:    movl %esi, %edx
+; CHECK-O0-NEXT:    movl %edi, %eax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: empty_swiftcc:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    movl 4(%esp), %eax
+; CHECK-i386-NEXT:    movl 8(%esp), %edx
+; CHECK-i386-NEXT:    movl 12(%esp), %ecx
+; CHECK-i386-NEXT:    retl
 entry:
   ret {i32, i32, i32} %0
 }
 
 ; Make sure we can handle the case when isel generates new machine basic blocks.
-; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks:
-; CHECK-APPLE: xorl    %eax, %eax
-; CHECK-APPLE: testb   %al, %al
-; CHECK-APPLE: jne
-; CHECK-APPLE:         pushq   %rax
-; CHECK-APPLE-NEXT:  .cfi_def_cfa_offset 16
-; CHECK-APPLE-NEXT:    callq   *%rax
-; CHECK-APPLE-NEXT:    popq    %rax
-; CHECK-APPLE-NEXT:    ret
-
 define swiftcc void @dont_crash_on_new_isel_blocks(%swift_error** nocapture swifterror, i1, i8**) {
+; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    xorl %eax, %eax
+; CHECK-APPLE-NEXT:    testb %al, %al
+; CHECK-APPLE-NEXT:    jne LBB15_2
+; CHECK-APPLE-NEXT:  ## %bb.1: ## %entry
+; CHECK-APPLE-NEXT:    testb $1, %dil
+; CHECK-APPLE-NEXT:  LBB15_2: ## %cont
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq *%rax
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: dont_crash_on_new_isel_blocks:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    movb %dil, %al
+; CHECK-O0-NEXT:    orb $0, %al
+; CHECK-O0-NEXT:    testb $1, %al
+; CHECK-O0-NEXT:    jne LBB15_2
+; CHECK-O0-NEXT:  ## %bb.1: ## %falsebb
+; CHECK-O0-NEXT:  LBB15_2: ## %cont
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    callq *%rax
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: dont_crash_on_new_isel_blocks:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    xorl %eax, %eax
+; CHECK-i386-NEXT:    testb %al, %al
+; CHECK-i386-NEXT:    jne LBB15_2
+; CHECK-i386-NEXT:  ## %bb.1: ## %entry
+; CHECK-i386-NEXT:    testb $1, 8(%esp)
+; CHECK-i386-NEXT:  LBB15_2: ## %cont
+; CHECK-i386-NEXT:    jmpl *%eax ## TAILCALL
 entry:
   %3 = or i1 false, %1
   br i1 %3, label %cont, label %falsebb
@@ -510,69 +1335,230 @@ cont:
   ret void
 }
 
-; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: movq %r12, [[REG:%.*]]
-; CHECK-APPLE: nop
-; CHECK-APPLE: movq [[REG]], %r12
 define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: swifterror_clobber:
+; CHECK-APPLE:       ## %bb.0:
+; CHECK-APPLE-NEXT:    movq %r12, %rax
+; CHECK-APPLE-NEXT:    ## InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ## InlineAsm End
+; CHECK-APPLE-NEXT:    movq %rax, %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: swifterror_clobber:
+; CHECK-O0:       ## %bb.0:
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    ## InlineAsm Start
+; CHECK-O0-NEXT:    nop
+; CHECK-O0-NEXT:    ## InlineAsm End
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: swifterror_clobber:
+; CHECK-i386:       ## %bb.0:
+; CHECK-i386-NEXT:    ## InlineAsm Start
+; CHECK-i386-NEXT:    nop
+; CHECK-i386-NEXT:    ## InlineAsm End
+; CHECK-i386-NEXT:    retl
   call void asm sideeffect "nop", "~{r12}"()
   ret void
 }
 
-; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: pushq %r12
-; CHECK-APPLE: nop
-; CHECK-APPLE: popq  %r12
 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
+; CHECK-APPLE-LABEL: swifterror_reg_clobber:
+; CHECK-APPLE:       ## %bb.0:
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -16
+; CHECK-APPLE-NEXT:    ## InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ## InlineAsm End
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: swifterror_reg_clobber:
+; CHECK-O0:       ## %bb.0:
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    .cfi_offset %r12, -16
+; CHECK-O0-NEXT:    ## InlineAsm Start
+; CHECK-O0-NEXT:    nop
+; CHECK-O0-NEXT:    ## InlineAsm End
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: swifterror_reg_clobber:
+; CHECK-i386:       ## %bb.0:
+; CHECK-i386-NEXT:    ## InlineAsm Start
+; CHECK-i386-NEXT:    nop
+; CHECK-i386-NEXT:    ## InlineAsm End
+; CHECK-i386-NEXT:    retl
   call void asm sideeffect "nop", "~{r12}"()
   ret void
 }
 
-; CHECK-APPLE-LABEL: params_in_reg
-; Save callee save registers to store clobbered arguments.
-; CHECK-APPLE:  pushq   %rbp
-; CHECK-APPLE:  pushq   %r15
-; CHECK-APPLE:  pushq   %r14
-; Clobbered swiftself register.
-; CHECK-APPLE:  pushq   %r13
-; CHECK-APPLE:  pushq   %rbx
-; CHECK-APPLE:  subq    $48, %rsp
-; Save arguments.
-; CHECK-APPLE:  movq    %r12, 32(%rsp)
-; CHECK-APPLE:  movq    %r13, 24(%rsp)
-; CHECK-APPLE:  movq    %r9, 16(%rsp)
-; CHECK-APPLE:  movq    %r8, 8(%rsp)
-; CHECK-APPLE:  movq    %rcx, %r14
-; CHECK-APPLE:  movq    %rdx, %r15
-; CHECK-APPLE:  movq    %rsi, %rbx
-; CHECK-APPLE:  movq    %rdi, %rbp
-; Setup call.
-; CHECK-APPLE:  movl    $1, %edi
-; CHECK-APPLE:  movl    $2, %esi
-; CHECK-APPLE:  movl    $3, %edx
-; CHECK-APPLE:  movl    $4, %ecx
-; CHECK-APPLE:  movl    $5, %r8d
-; CHECK-APPLE:  movl    $6, %r9d
-; CHECK-APPLE:  xorl    %r13d, %r13d
-; CHECK-APPLE:  xorl    %r12d, %r12d
-; CHECK-APPLE:  callq   _params_in_reg2
-; Setup second call with stored arguments.
-; CHECK-APPLE:  movq    %rbp, %rdi
-; CHECK-APPLE:  movq    %rbx, %rsi
-; CHECK-APPLE:  movq    %r15, %rdx
-; CHECK-APPLE:  movq    %r14, %rcx
-; CHECK-APPLE:  movq    8(%rsp), %r8
-; CHECK-APPLE:  movq    16(%rsp), %r9
-; CHECK-APPLE:  movq    24(%rsp), %r13
-; CHECK-APPLE:  movq    32(%rsp), %r12
-; CHECK-APPLE:  callq   _params_in_reg2
-; CHECK-APPLE:  addq    $48, %rsp
-; CHECK-APPLE:  popq    %rbx
-; CHECK-APPLE:  popq    %r13
-; CHECK-APPLE:  popq    %r14
-; CHECK-APPLE:  popq    %r15
-; CHECK-APPLE:  popq    %rbp
 define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_in_reg:
+; CHECK-APPLE:       ## %bb.0:
+; CHECK-APPLE-NEXT:    pushq %rbp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %r15
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %r14
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    pushq %r13
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-APPLE-NEXT:    subq $48, %rsp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -48
+; CHECK-APPLE-NEXT:    .cfi_offset %r13, -40
+; CHECK-APPLE-NEXT:    .cfi_offset %r14, -32
+; CHECK-APPLE-NEXT:    .cfi_offset %r15, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %rbp, -16
+; CHECK-APPLE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %rcx, %r14
+; CHECK-APPLE-NEXT:    movq %rdx, %r15
+; CHECK-APPLE-NEXT:    movq %rsi, %rbx
+; CHECK-APPLE-NEXT:    movq %rdi, %rbp
+; CHECK-APPLE-NEXT:    movl $1, %edi
+; CHECK-APPLE-NEXT:    movl $2, %esi
+; CHECK-APPLE-NEXT:    movl $3, %edx
+; CHECK-APPLE-NEXT:    movl $4, %ecx
+; CHECK-APPLE-NEXT:    movl $5, %r8d
+; CHECK-APPLE-NEXT:    movl $6, %r9d
+; CHECK-APPLE-NEXT:    xorl %r13d, %r13d
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _params_in_reg2
+; CHECK-APPLE-NEXT:    movq %rbp, %rdi
+; CHECK-APPLE-NEXT:    movq %rbx, %rsi
+; CHECK-APPLE-NEXT:    movq %r15, %rdx
+; CHECK-APPLE-NEXT:    movq %r14, %rcx
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    callq _params_in_reg2
+; CHECK-APPLE-NEXT:    addq $48, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r13
+; CHECK-APPLE-NEXT:    popq %r14
+; CHECK-APPLE-NEXT:    popq %r15
+; CHECK-APPLE-NEXT:    popq %rbp
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: params_in_reg:
+; CHECK-O0:       ## %bb.0:
+; CHECK-O0-NEXT:    pushq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $80, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-O0-NEXT:    .cfi_offset %r13, -16
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    movl $1, %edi
+; CHECK-O0-NEXT:    movl $2, %esi
+; CHECK-O0-NEXT:    movl $3, %edx
+; CHECK-O0-NEXT:    movl $4, %ecx
+; CHECK-O0-NEXT:    movl $5, %r8d
+; CHECK-O0-NEXT:    movl $6, %r9d
+; CHECK-O0-NEXT:    movq %r12, %r13
+; CHECK-O0-NEXT:    callq _params_in_reg2
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    callq _params_in_reg2
+; CHECK-O0-NEXT:    addq $80, %rsp
+; CHECK-O0-NEXT:    popq %r13
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: params_in_reg:
+; CHECK-i386:       ## %bb.0:
+; CHECK-i386-NEXT:    pushl %ebp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %ebx
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-i386-NEXT:    subl $60, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-i386-NEXT:    .cfi_offset %esi, -20
+; CHECK-i386-NEXT:    .cfi_offset %edi, -16
+; CHECK-i386-NEXT:    .cfi_offset %ebx, -12
+; CHECK-i386-NEXT:    .cfi_offset %ebp, -8
+; CHECK-i386-NEXT:    movl $0, 56(%esp)
+; CHECK-i386-NEXT:    movl 120(%esp), %ebx
+; CHECK-i386-NEXT:    movl 124(%esp), %ebp
+; CHECK-i386-NEXT:    movl 128(%esp), %esi
+; CHECK-i386-NEXT:    movl 132(%esp), %edi
+; CHECK-i386-NEXT:    leal 56(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 52(%esp)
+; CHECK-i386-NEXT:    movl $0, 48(%esp)
+; CHECK-i386-NEXT:    movl $0, 44(%esp)
+; CHECK-i386-NEXT:    movl $6, 40(%esp)
+; CHECK-i386-NEXT:    movl $0, 36(%esp)
+; CHECK-i386-NEXT:    movl $5, 32(%esp)
+; CHECK-i386-NEXT:    movl $0, 28(%esp)
+; CHECK-i386-NEXT:    movl $4, 24(%esp)
+; CHECK-i386-NEXT:    movl $0, 20(%esp)
+; CHECK-i386-NEXT:    movl $3, 16(%esp)
+; CHECK-i386-NEXT:    movl $0, 12(%esp)
+; CHECK-i386-NEXT:    movl $2, 8(%esp)
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $1, (%esp)
+; CHECK-i386-NEXT:    calll _params_in_reg2
+; CHECK-i386-NEXT:    movl %edi, 52(%esp)
+; CHECK-i386-NEXT:    movl %esi, 48(%esp)
+; CHECK-i386-NEXT:    movl %ebp, 44(%esp)
+; CHECK-i386-NEXT:    movl %ebx, 40(%esp)
+; CHECK-i386-NEXT:    movl 116(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 36(%esp)
+; CHECK-i386-NEXT:    movl 112(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 32(%esp)
+; CHECK-i386-NEXT:    movl 108(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 28(%esp)
+; CHECK-i386-NEXT:    movl 104(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 24(%esp)
+; CHECK-i386-NEXT:    movl 100(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 20(%esp)
+; CHECK-i386-NEXT:    movl 96(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 16(%esp)
+; CHECK-i386-NEXT:    movl 92(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 12(%esp)
+; CHECK-i386-NEXT:    movl 88(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 8(%esp)
+; CHECK-i386-NEXT:    movl 84(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 4(%esp)
+; CHECK-i386-NEXT:    movl 80(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _params_in_reg2
+; CHECK-i386-NEXT:    addl $60, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    popl %ebx
+; CHECK-i386-NEXT:    popl %ebp
+; CHECK-i386-NEXT:    retl
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -581,76 +1567,264 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i8* swiftself,
 }
 declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
 
-; CHECK-APPLE-LABEL: params_and_return_in_reg
-; CHECK-APPLE:  pushq   %rbp
-; CHECK-APPLE:  pushq   %r15
-; CHECK-APPLE:  pushq   %r14
-; CHECK-APPLE:  pushq   %r13
-; CHECK-APPLE:  pushq   %rbx
-; CHECK-APPLE:  subq    $48, %rsp
-; Store arguments.
-; CHECK-APPLE:  movq    %r12, %r14
-; CHECK-APPLE:  movq    %r13, (%rsp)
-; CHECK-APPLE:  movq    %r9, 32(%rsp)
-; CHECK-APPLE:  movq    %r8, 24(%rsp)
-; CHECK-APPLE:  movq    %rcx, 16(%rsp)
-; CHECK-APPLE:  movq    %rdx, %r15
-; CHECK-APPLE:  movq    %rsi, %rbx
-; CHECK-APPLE:  movq    %rdi, %rbp
-; Setup call that clobbers all argument registers.
-; CHECK-APPLE:  movl    $1, %edi
-; CHECK-APPLE:  movl    $2, %esi
-; CHECK-APPLE:  movl    $3, %edx
-; CHECK-APPLE:  movl    $4, %ecx
-; CHECK-APPLE:  movl    $5, %r8d
-; CHECK-APPLE:  movl    $6, %r9d
-; CHECK-APPLE:  xorl    %r13d, %r13d
-; CHECK-APPLE:  xorl    %r12d, %r12d
-; CHECK-APPLE:  callq   _params_in_reg2
-; Store error_ptr_ref for later use.
-; CHECK-APPLE:  movq    %r12, 8(%rsp)
-; Restore original arguments.
-; CHECK-APPLE:  movq    %rbp, %rdi
-; CHECK-APPLE:  movq    %rbx, %rsi
-; CHECK-APPLE:  movq    %r15, %rdx
-; CHECK-APPLE:  movq    16(%rsp), %rcx
-; CHECK-APPLE:  movq    24(%rsp), %r8
-; CHECK-APPLE:  movq    32(%rsp), %r9
-; CHECK-APPLE:  movq    (%rsp), %r13
-; CHECK-APPLE:  movq    %r14, %r12
-; CHECK-APPLE:  callq   _params_and_return_in_reg2
-; Store return values in callee saved registers.
-; CHECK-APPLE:  movq    %rax, %rbx
-; CHECK-APPLE:  movq    %rdx, %rbp
-; CHECK-APPLE:  movq    %rcx, %r15
-; CHECK-APPLE:  movq    %r8, %r14
-; Store the swifterror return value (%err).
-; CHECK-APPLE:  movq    %r12, (%rsp)
-; Setup call.
-; CHECK-APPLE:  movl    $1, %edi
-; CHECK-APPLE:  movl    $2, %esi
-; CHECK-APPLE:  movl    $3, %edx
-; CHECK-APPLE:  movl    $4, %ecx
-; CHECK-APPLE:  movl    $5, %r8d
-; CHECK-APPLE:  movl    $6, %r9d
-; CHECK-APPLE:  xorl    %r13d, %r13d
-; Restore the swifterror value of error_ptr_ref.
-; CHECK-APPLE:  movq    8(%rsp), %r12
-; CHECK-APPLE:  callq   _params_in_reg2
-; Restore the return values of _params_and_return_in_reg2.
-; CHECK-APPLE:  movq    %rbx, %rax
-; CHECK-APPLE:  movq    %rbp, %rdx
-; CHECK-APPLE:  movq    %r15, %rcx
-; CHECK-APPLE:  movq    %r14, %r8
-; Restore the swiferror value of err.
-; CHECK-APPLE:  movq    (%rsp), %r12
-; CHECK-APPLE:  addq    $48, %rsp
-; CHECK-APPLE:  popq    %rbx
-; CHECK-APPLE:  popq    %r13
-; CHECK-APPLE:  popq    %r14
-; CHECK-APPLE:  popq    %r15
-; CHECK-APPLE:  popq    %rbp
 define swiftcc { i64, i64, i64, i64} @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_and_return_in_reg:
+; CHECK-APPLE:       ## %bb.0:
+; CHECK-APPLE-NEXT:    pushq %rbp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    pushq %r15
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-APPLE-NEXT:    pushq %r14
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    pushq %r13
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-APPLE-NEXT:    subq $48, %rsp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -48
+; CHECK-APPLE-NEXT:    .cfi_offset %r13, -40
+; CHECK-APPLE-NEXT:    .cfi_offset %r14, -32
+; CHECK-APPLE-NEXT:    .cfi_offset %r15, -24
+; CHECK-APPLE-NEXT:    .cfi_offset %rbp, -16
+; CHECK-APPLE-NEXT:    movq %r12, %r14
+; CHECK-APPLE-NEXT:    movq %r13, (%rsp) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %rdx, %r15
+; CHECK-APPLE-NEXT:    movq %rsi, %rbx
+; CHECK-APPLE-NEXT:    movq %rdi, %rbp
+; CHECK-APPLE-NEXT:    movl $1, %edi
+; CHECK-APPLE-NEXT:    movl $2, %esi
+; CHECK-APPLE-NEXT:    movl $3, %edx
+; CHECK-APPLE-NEXT:    movl $4, %ecx
+; CHECK-APPLE-NEXT:    movl $5, %r8d
+; CHECK-APPLE-NEXT:    movl $6, %r9d
+; CHECK-APPLE-NEXT:    xorl %r13d, %r13d
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _params_in_reg2
+; CHECK-APPLE-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movq %rbp, %rdi
+; CHECK-APPLE-NEXT:    movq %rbx, %rsi
+; CHECK-APPLE-NEXT:    movq %r15, %rdx
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq (%rsp), %r13 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    movq %r14, %r12
+; CHECK-APPLE-NEXT:    callq _params_and_return_in_reg2
+; CHECK-APPLE-NEXT:    movq %rax, %rbx
+; CHECK-APPLE-NEXT:    movq %rdx, %rbp
+; CHECK-APPLE-NEXT:    movq %rcx, %r15
+; CHECK-APPLE-NEXT:    movq %r8, %r14
+; CHECK-APPLE-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-APPLE-NEXT:    movl $1, %edi
+; CHECK-APPLE-NEXT:    movl $2, %esi
+; CHECK-APPLE-NEXT:    movl $3, %edx
+; CHECK-APPLE-NEXT:    movl $4, %ecx
+; CHECK-APPLE-NEXT:    movl $5, %r8d
+; CHECK-APPLE-NEXT:    movl $6, %r9d
+; CHECK-APPLE-NEXT:    xorl %r13d, %r13d
+; CHECK-APPLE-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    callq _params_in_reg2
+; CHECK-APPLE-NEXT:    movq %rbx, %rax
+; CHECK-APPLE-NEXT:    movq %rbp, %rdx
+; CHECK-APPLE-NEXT:    movq %r15, %rcx
+; CHECK-APPLE-NEXT:    movq %r14, %r8
+; CHECK-APPLE-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-APPLE-NEXT:    addq $48, %rsp
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    popq %r13
+; CHECK-APPLE-NEXT:    popq %r14
+; CHECK-APPLE-NEXT:    popq %r15
+; CHECK-APPLE-NEXT:    popq %rbp
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: params_and_return_in_reg:
+; CHECK-O0:       ## %bb.0:
+; CHECK-O0-NEXT:    pushq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $176, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-O0-NEXT:    .cfi_offset %r13, -16
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r13, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $1, %edi
+; CHECK-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $2, %esi
+; CHECK-O0-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $3, %edx
+; CHECK-O0-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $4, %ecx
+; CHECK-O0-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $5, %r8d
+; CHECK-O0-NEXT:    movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movl $6, %r9d
+; CHECK-O0-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %r13
+; CHECK-O0-NEXT:    callq _params_in_reg2
+; CHECK-O0-NEXT:    movq (%rsp), %r13 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    callq _params_and_return_in_reg2
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rdx, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %rcx, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r8, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    callq _params_in_reg2
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rsi
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    addq $176, %rsp
+; CHECK-O0-NEXT:    popq %r13
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: params_and_return_in_reg:
+; CHECK-i386:       ## %bb.0:
+; CHECK-i386-NEXT:    pushl %ebp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    pushl %ebx
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-i386-NEXT:    pushl %edi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-i386-NEXT:    subl $124, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-i386-NEXT:    .cfi_offset %esi, -20
+; CHECK-i386-NEXT:    .cfi_offset %edi, -16
+; CHECK-i386-NEXT:    .cfi_offset %ebx, -12
+; CHECK-i386-NEXT:    .cfi_offset %ebp, -8
+; CHECK-i386-NEXT:    movl $0, 64(%esp)
+; CHECK-i386-NEXT:    movl 188(%esp), %ebp
+; CHECK-i386-NEXT:    movl 192(%esp), %ebx
+; CHECK-i386-NEXT:    movl 196(%esp), %edi
+; CHECK-i386-NEXT:    movl 200(%esp), %esi
+; CHECK-i386-NEXT:    leal 64(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 52(%esp)
+; CHECK-i386-NEXT:    movl $0, 48(%esp)
+; CHECK-i386-NEXT:    movl $0, 44(%esp)
+; CHECK-i386-NEXT:    movl $6, 40(%esp)
+; CHECK-i386-NEXT:    movl $0, 36(%esp)
+; CHECK-i386-NEXT:    movl $5, 32(%esp)
+; CHECK-i386-NEXT:    movl $0, 28(%esp)
+; CHECK-i386-NEXT:    movl $4, 24(%esp)
+; CHECK-i386-NEXT:    movl $0, 20(%esp)
+; CHECK-i386-NEXT:    movl $3, 16(%esp)
+; CHECK-i386-NEXT:    movl $0, 12(%esp)
+; CHECK-i386-NEXT:    movl $2, 8(%esp)
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $1, (%esp)
+; CHECK-i386-NEXT:    calll _params_in_reg2
+; CHECK-i386-NEXT:    movl %esi, 56(%esp)
+; CHECK-i386-NEXT:    movl %edi, 52(%esp)
+; CHECK-i386-NEXT:    movl %ebx, 48(%esp)
+; CHECK-i386-NEXT:    movl %ebp, 44(%esp)
+; CHECK-i386-NEXT:    movl 184(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 40(%esp)
+; CHECK-i386-NEXT:    movl 180(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 36(%esp)
+; CHECK-i386-NEXT:    movl 176(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 32(%esp)
+; CHECK-i386-NEXT:    movl 172(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 28(%esp)
+; CHECK-i386-NEXT:    movl 168(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 24(%esp)
+; CHECK-i386-NEXT:    movl 164(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 20(%esp)
+; CHECK-i386-NEXT:    movl 160(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 16(%esp)
+; CHECK-i386-NEXT:    movl 156(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 12(%esp)
+; CHECK-i386-NEXT:    movl 152(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 8(%esp)
+; CHECK-i386-NEXT:    movl 148(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 4(%esp)
+; CHECK-i386-NEXT:    leal 88(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _params_and_return_in_reg2
+; CHECK-i386-NEXT:    subl $4, %esp
+; CHECK-i386-NEXT:    movl 88(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-i386-NEXT:    movl 92(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-i386-NEXT:    movl 96(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-i386-NEXT:    movl 100(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-i386-NEXT:    movl 104(%esp), %ebp
+; CHECK-i386-NEXT:    movl 108(%esp), %edi
+; CHECK-i386-NEXT:    movl 112(%esp), %ebx
+; CHECK-i386-NEXT:    movl 116(%esp), %esi
+; CHECK-i386-NEXT:    leal 64(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, 52(%esp)
+; CHECK-i386-NEXT:    movl $0, 48(%esp)
+; CHECK-i386-NEXT:    movl $0, 44(%esp)
+; CHECK-i386-NEXT:    movl $6, 40(%esp)
+; CHECK-i386-NEXT:    movl $0, 36(%esp)
+; CHECK-i386-NEXT:    movl $5, 32(%esp)
+; CHECK-i386-NEXT:    movl $0, 28(%esp)
+; CHECK-i386-NEXT:    movl $4, 24(%esp)
+; CHECK-i386-NEXT:    movl $0, 20(%esp)
+; CHECK-i386-NEXT:    movl $3, 16(%esp)
+; CHECK-i386-NEXT:    movl $0, 12(%esp)
+; CHECK-i386-NEXT:    movl $2, 8(%esp)
+; CHECK-i386-NEXT:    movl $0, 4(%esp)
+; CHECK-i386-NEXT:    movl $1, (%esp)
+; CHECK-i386-NEXT:    calll _params_in_reg2
+; CHECK-i386-NEXT:    movl 144(%esp), %eax
+; CHECK-i386-NEXT:    movl %esi, 28(%eax)
+; CHECK-i386-NEXT:    movl %ebx, 24(%eax)
+; CHECK-i386-NEXT:    movl %edi, 20(%eax)
+; CHECK-i386-NEXT:    movl %ebp, 16(%eax)
+; CHECK-i386-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-i386-NEXT:    movl %ecx, 12(%eax)
+; CHECK-i386-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-i386-NEXT:    movl %ecx, 8(%eax)
+; CHECK-i386-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-i386-NEXT:    movl %ecx, 4(%eax)
+; CHECK-i386-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-i386-NEXT:    movl %ecx, (%eax)
+; CHECK-i386-NEXT:    addl $124, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    popl %edi
+; CHECK-i386-NEXT:    popl %ebx
+; CHECK-i386-NEXT:    popl %ebp
+; CHECK-i386-NEXT:    retl $4
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -666,11 +1840,39 @@ declare void @acallee(i8*)
 
 ; Make sure we don't tail call if the caller returns a swifterror value. We
 ; would have to move into the swifterror register before the tail call.
-; CHECK-APPLE: tailcall_from_swifterror:
-; CHECK-APPLE-NOT: jmp _acallee
-; CHECK-APPLE: callq _acallee
-
 define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcall_from_swifterror:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rbx
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    .cfi_offset %rbx, -16
+; CHECK-APPLE-NEXT:    movq %r12, %rbx
+; CHECK-APPLE-NEXT:    xorl %edi, %edi
+; CHECK-APPLE-NEXT:    callq _acallee
+; CHECK-APPLE-NEXT:    movq %rbx, %r12
+; CHECK-APPLE-NEXT:    popq %rbx
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: tailcall_from_swifterror:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %edi
+; CHECK-O0-NEXT:    callq _acallee
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: tailcall_from_swifterror:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    movl $0, (%esp)
+; CHECK-i386-NEXT:    calll _acallee
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
 entry:
   tail call void @acallee(i8* null)
   ret void
@@ -687,6 +1889,42 @@ declare hidden swiftcc i8* @testFunA()
 %TSb = type <{ i1 }>
 
 define swiftcc void @dontCrash()  {
+; CHECK-APPLE-LABEL: dontCrash:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _testFunA
+; CHECK-APPLE-NEXT:    cmpb $1, (%rax)
+; CHECK-APPLE-NEXT:    popq %rax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: dontCrash:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    ## kill: def $rax killed $eax
+; CHECK-O0-NEXT:    callq _testFunA
+; CHECK-O0-NEXT:    testb $1, (%rax)
+; CHECK-O0-NEXT:    jne LBB21_1
+; CHECK-O0-NEXT:    jmp LBB21_2
+; CHECK-O0-NEXT:  LBB21_1: ## %trueBB
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+; CHECK-O0-NEXT:  LBB21_2: ## %falseBB
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: dontCrash:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    movl $0, 8(%esp)
+; CHECK-i386-NEXT:    calll _testFunA
+; CHECK-i386-NEXT:    cmpb $1, (%eax)
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
 entry:
   %swifterror = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %swifterror, align 8
@@ -707,26 +1945,50 @@ falseBB:
 declare swiftcc void @foo2(%swift_error** swifterror)
 
 ; Make sure we properly assign registers during fast-isel.
-; CHECK-O0-LABEL: testAssign
-; CHECK-O0:        pushq   %r12
-; CHECK-O0:        xorl    [[ZERO:%[a-z0-9]+]], [[ZERO]]
-; CHECK-O0:        movl    [[ZERO]], %r12d
-; CHECK-O0:        callq   _foo2
-; CHECK-O0:        movq    %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
-;
-; CHECK-O0:        movq    [[SLOT]], %rax
-; CHECK-O0:        popq    %r12
-; CHECK-O0:        retq
-
-; CHECK-APPLE-LABEL: testAssign
-; CHECK-APPLE:        pushq   %r12
-; CHECK-APPLE:        xorl    %r12d, %r12d
-; CHECK-APPLE:        callq   _foo2
-; CHECK-APPLE:        movq    %r12, %rax
-; CHECK-APPLE:        popq    %r12
-; CHECK-APPLE:        retq
-
 define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+; CHECK-APPLE-LABEL: testAssign:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %r12
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    subq $16, %rsp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    .cfi_offset %r12, -16
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    callq _foo2
+; CHECK-APPLE-NEXT:    movq %r12, %rax
+; CHECK-APPLE-NEXT:    addq $16, %rsp
+; CHECK-APPLE-NEXT:    popq %r12
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: testAssign:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    subq $16, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT:    .cfi_offset %r12, -16
+; CHECK-O0-NEXT:    ## implicit-def: $rax
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    movl %eax, %r12d
+; CHECK-O0-NEXT:    callq _foo2
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:  ## %bb.1: ## %a
+; CHECK-O0-NEXT:    movq (%rsp), %rax ## 8-byte Reload
+; CHECK-O0-NEXT:    addq $16, %rsp
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: testAssign:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    subl $12, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    movl $0, 8(%esp)
+; CHECK-i386-NEXT:    leal 8(%esp), %eax
+; CHECK-i386-NEXT:    movl %eax, (%esp)
+; CHECK-i386-NEXT:    calll _foo2
+; CHECK-i386-NEXT:    movl 8(%esp), %eax
+; CHECK-i386-NEXT:    addl $12, %esp
+; CHECK-i386-NEXT:    retl
 entry:
   %error_ptr = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr
@@ -738,17 +2000,26 @@ a:
   ret %swift_error* %error
 }
 
-; CHECK-O0-LABEL: testAssign2
-; CHECK-O0:        movq    %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
-; CHECK-O0:        jmp
-; CHECK-O0:        movq    [[SLOT]], %r12
-; CHECK-O0-NEXT:   movq    %r12, %rax
-; CHECK-O0-NEXT:   retq
-
-; CHECK-APPLE-LABEL: testAssign2
-; CHECK-APPLE:        movq    %r12, %rax
-; CHECK-APPLE:        retq
 define swiftcc %swift_error* @testAssign2(i8* %error_ref, %swift_error** swifterror %err) {
+; CHECK-APPLE-LABEL: testAssign2:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    movq %r12, %rax
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: testAssign2:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-O0-NEXT:    jmp LBB23_1
+; CHECK-O0-NEXT:  LBB23_1: ## %a
+; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: testAssign2:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    movl 8(%esp), %eax
+; CHECK-i386-NEXT:    movl (%eax), %eax
+; CHECK-i386-NEXT:    retl
 entry:
   br label %a
 
@@ -757,20 +2028,42 @@ a:
   ret %swift_error* %error
 }
 
-; CHECK-O0-LABEL: testAssign3
-; CHECK-O0:        callq   _foo2
-; CHECK-O0:        movq    %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
-; CHECK-O0:        movq    [[SLOT]], %r12
-; CHECK-O0-NEXT:   movq    %r12, %rax
-; CHECK-O0-NEXT:   popq    %rcx
-; CHECK-O0-NEXT:   retq
-
-; CHECK-APPLE-LABEL: testAssign3
-; CHECK-APPLE:         callq   _foo2
-; CHECK-APPLE:         movq    %r12, %rax
-; CHECK-APPLE:         retq
-
 define swiftcc %swift_error* @testAssign3(i8* %error_ref, %swift_error** swifterror %err) {
+; CHECK-APPLE-LABEL: testAssign3:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _foo2
+; CHECK-APPLE-NEXT:    movq %r12, %rax
+; CHECK-APPLE-NEXT:    popq %rcx
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: testAssign3:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    callq _foo2
+; CHECK-O0-NEXT:    movq %r12, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:  ## %bb.1: ## %a
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: testAssign3:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    subl $8, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    .cfi_offset %esi, -8
+; CHECK-i386-NEXT:    movl 20(%esp), %esi
+; CHECK-i386-NEXT:    movl %esi, (%esp)
+; CHECK-i386-NEXT:    calll _foo2
+; CHECK-i386-NEXT:    movl (%esi), %eax
+; CHECK-i386-NEXT:    addl $8, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    retl
 entry:
   call swiftcc void @foo2(%swift_error** swifterror %err)
   br label %a
@@ -780,24 +2073,46 @@ a:
   ret %swift_error* %error
 }
 
-
-; CHECK-O0-LABEL: testAssign4
-; CHECK-O0:        callq   _foo2
-; CHECK-O0:        xorl    %eax, %eax
-; CHECK-O0: ## kill: def $rax killed $eax
-; CHECK-O0:        movq    %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
-; CHECK-O0:        movq    [[SLOT]], %r12
-; CHECK-O0-NEXT:   movq    %r12, %rax
-; CHECK-O0-NEXT:   popq    %rcx
-; CHECK-O0-NEXT:   retq
-
-; CHECK-APPLE-LABEL: testAssign4
-; CHECK-APPLE:        callq   _foo2
-; CHECK-APPLE:        xorl    %eax, %eax
-; CHECK-APPLE:        xorl    %r12d, %r12d
-; CHECK-APPLE:        retq
-
 define swiftcc %swift_error* @testAssign4(i8* %error_ref, %swift_error** swifterror %err) {
+; CHECK-APPLE-LABEL: testAssign4:
+; CHECK-APPLE:       ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT:    pushq %rax
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq _foo2
+; CHECK-APPLE-NEXT:    xorl %eax, %eax
+; CHECK-APPLE-NEXT:    xorl %r12d, %r12d
+; CHECK-APPLE-NEXT:    popq %rcx
+; CHECK-APPLE-NEXT:    retq
+;
+; CHECK-O0-LABEL: testAssign4:
+; CHECK-O0:       ## %bb.0: ## %entry
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    callq _foo2
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    ## kill: def $rax killed $eax
+; CHECK-O0-NEXT:    movq %rax, (%rsp) ## 8-byte Spill
+; CHECK-O0-NEXT:  ## %bb.1: ## %a
+; CHECK-O0-NEXT:    movq (%rsp), %r12 ## 8-byte Reload
+; CHECK-O0-NEXT:    movq %r12, %rax
+; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-i386-LABEL: testAssign4:
+; CHECK-i386:       ## %bb.0: ## %entry
+; CHECK-i386-NEXT:    pushl %esi
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-i386-NEXT:    subl $8, %esp
+; CHECK-i386-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i386-NEXT:    .cfi_offset %esi, -8
+; CHECK-i386-NEXT:    movl 20(%esp), %esi
+; CHECK-i386-NEXT:    movl %esi, (%esp)
+; CHECK-i386-NEXT:    calll _foo2
+; CHECK-i386-NEXT:    movl $0, (%esi)
+; CHECK-i386-NEXT:    movl (%esi), %eax
+; CHECK-i386-NEXT:    addl $8, %esp
+; CHECK-i386-NEXT:    popl %esi
+; CHECK-i386-NEXT:    retl
 entry:
   call swiftcc void @foo2(%swift_error** swifterror %err)
   store %swift_error* null, %swift_error** %err


        


More information about the llvm-commits mailing list