[llvm] 3c94869 - [NFC][ARM] Fix update_llc_test_checks for aarch64-apple-ios/thumbv7s-apple-darwin, autogenerate a few tests

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 23 06:34:47 PDT 2021


Author: Roman Lebedev
Date: 2021-06-23T16:31:19+03:00
New Revision: 3c94869632d3f762c1699d4d920c1ac2721b95c0

URL: https://github.com/llvm/llvm-project/commit/3c94869632d3f762c1699d4d920c1ac2721b95c0
DIFF: https://github.com/llvm/llvm-project/commit/3c94869632d3f762c1699d4d920c1ac2721b95c0.diff

LOG: [NFC][ARM] Fix update_llc_test_checks for aarch64-apple-ios/thumbv7s-apple-darwin, autogenerate a few tests

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
    llvm/test/CodeGen/AArch64/branch-relax-asm.ll
    llvm/test/CodeGen/AArch64/swifterror.ll
    llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll
    llvm/utils/UpdateTestChecks/asm.py

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
index 678a9ece6fea0..f8e8beac38ac9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -frame-pointer=all -global-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck %s
 
 declare i8* @malloc(i64)
@@ -8,13 +9,20 @@ declare void @free(i8*)
 ; that takes a swifterror parameter and "caller" is the caller of "foo".
 define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-LABEL: foo:
-; CHECK: mov w0, #16
-; CHECK: malloc
-; CHECK: mov [[ID:w[0-9]+]], #1
-; CHECK: strb [[ID]], [x0, #8]
-; CHECK: mov x21, x0
-; CHECK-NOT: x21
-
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    mov w0, #16
+; CHECK-NEXT:    bl _malloc
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    strb w8, [x0, #8]
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -26,16 +34,35 @@ entry:
 
 ; "caller" calls "foo" that takes a swifterror parameter.
 define float @caller(i8* %error_ref) {
-; CHECK-LABEL: caller:
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: bl {{.*}}foo
-; CHECK: mov x0, x21
-; CHECK: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: bl {{.*}}free
-
+; CHECK-LABEL: caller:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #32 ; =32
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    cbnz x21, LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %cont
+; CHECK-NEXT:    ldrb w8, [x0, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB1_2: ; %handler
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -56,20 +83,46 @@ handler:
 
 ; "caller2" is the caller of "foo", it calls "foo" inside a loop.
 define float @caller2(i8* %error_ref) {
-; CHECK-LABEL: caller2:
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: fmov [[CMP:s[0-9]+]], #1.0
-; CHECK: mov x21, xzr
-; CHECK: bl {{.*}}foo
-; CHECK: cbnz x21
-; CHECK: fcmp s0, [[CMP]]
-; CHECK: b.le
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x21, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: mov x0, x21
-; CHECK: bl {{.*}}free
-
+; CHECK-LABEL: caller2:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d9, d8, [sp, #-64]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #48 ; =48
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    .cfi_offset b8, -56
+; CHECK-NEXT:    .cfi_offset b9, -64
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    fmov s8, #1.00000000
+; CHECK-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    cbnz x21, LBB2_4
+; CHECK-NEXT:  ; %bb.2: ; %cont
+; CHECK-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT:    fcmp s0, s8
+; CHECK-NEXT:    b.le LBB2_1
+; CHECK-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-NEXT:    ldrb w8, [x21, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB2_4: ; %handler
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp], #64 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -97,15 +150,25 @@ handler:
 ; under a certain condition.
 define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-LABEL: foo_if:
-; CHECK: cbz w0
-; CHECK: mov w0, #16
-; CHECK: malloc
-; CHECK: mov [[ID:w[0-9]+]], #1
-; CHECK: strb [[ID]], [x0, #8]
-; CHECK: mov x21, x0
-; CHECK-NOT: x21
-; CHECK: ret
-
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    cbz w0, LBB3_2
+; CHECK-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    mov w0, #16
+; CHECK-NEXT:    bl _malloc
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    strb w8, [x0, #8]
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB3_2: ; %normal
+; CHECK-NEXT:    movi d0, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -126,13 +189,43 @@ normal:
 ; under a certain condition inside a loop.
 define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
 ; CHECK-LABEL: foo_loop:
-; CHECK: cbz
-; CHECK: mov w0, #16
-; CHECK: malloc
-; CHECK: mov x21, x0
-; CHECK: strb w{{.*}}, [x0, #8]
-; CHECK: ret
-
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d9, d8, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #32 ; =32
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    mov.16b v8, v0
+; CHECK-NEXT:    mov w20, #1
+; CHECK-NEXT:    fmov s9, #1.00000000
+; CHECK-NEXT:    b LBB4_2
+; CHECK-NEXT:  LBB4_1: ; %bb_cont
+; CHECK-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    b.gt LBB4_4
+; CHECK-NEXT:  LBB4_2: ; %bb_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    cbz w19, LBB4_1
+; CHECK-NEXT:  ; %bb.3: ; %gen_error
+; CHECK-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    mov w0, #16
+; CHECK-NEXT:    bl _malloc
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    strb w20, [x0, #8]
+; CHECK-NEXT:    b LBB4_1
+; CHECK-NEXT:  LBB4_4: ; %bb_end
+; CHECK-NEXT:    movi d0, #0000000000000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp], #48 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   br label %bb_loop
 
@@ -161,15 +254,26 @@ bb_end:
 ; parameter.
 define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
 ; CHECK-LABEL: foo_sret:
-; CHECK: mov [[SRET:x[0-9]+]], x8
-; CHECK: mov w0, #16
-; CHECK: malloc
-; CHECK: mov [[ID:w[0-9]+]], #1
-; CHECK: strb [[ID]], [x0, #8]
-; CHECK: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK: mov x21, x0
-; CHECK-NOT: x21
-
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16 ; =16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    mov x19, x8
+; CHECK-NEXT:    mov w20, w0
+; CHECK-NEXT:    mov w0, #16
+; CHECK-NEXT:    bl _malloc
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    strb w8, [x0, #8]
+; CHECK-NEXT:    str w20, [x19, #4]
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -183,17 +287,39 @@ entry:
 
 ; "caller3" calls "foo_sret" that takes a swifterror parameter.
 define float @caller3(i8* %error_ref) {
-; CHECK-LABEL: caller3:
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: mov [[ZERO:x[0-9]+]], xzr
-; CHECK: bl {{.*}}foo_sret
-; CHECK: mov x0, x21
-; CHECK: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: bl {{.*}}free
-
+; CHECK-LABEL: caller3:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #64 ; =64
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    add x8, sp, #8 ; =8
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo_sret
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    cbnz x21, LBB6_2
+; CHECK-NEXT:  ; %bb.1: ; %cont
+; CHECK-NEXT:    ldrb w8, [x0, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB6_2: ; %handler
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80 ; =80
+; CHECK-NEXT:    ret
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -217,21 +343,40 @@ handler:
 ; variable number of arguments.
 declare void @llvm.va_start(i8*) nounwind
 define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
-; CHECK-LABEL: foo_vararg:
-; CHECK: mov w0, #16
-; CHECK: malloc
-; CHECK-DAG: mov [[ID:w[0-9]+]], #1
-; CHECK-DAG: strb [[ID]], [x0, #8]
-
 ; First vararg
-; CHECK: ldr {{w[0-9]+}}, [x[[ARG1:[0-9]+]]], #8
 ; Second vararg
-; CHECK: ldr {{w[0-9]+}}, [x[[ARG1]]], #8
 ; Third vararg
-; CHECK: ldr {{w[0-9]+}}, [x[[ARG1]]], #8
-
-; CHECK: mov x21, x0
-; CHECK-NOT: x21
+; CHECK-LABEL: foo_vararg:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #32 ; =32
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    mov w0, #16
+; CHECK-NEXT:    bl _malloc
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    sub x9, x29, #8 ; =8
+; CHECK-NEXT:    strb w8, [x0, #8]
+; CHECK-NEXT:    add x8, x29, #16 ; =16
+; CHECK-NEXT:    str x8, [x9]
+; CHECK-NEXT:    ldur x8, [x29, #-8]
+; CHECK-NEXT:    ldr w9, [x8], #8
+; CHECK-NEXT:    stur x8, [x29, #-8]
+; CHECK-NEXT:    stur w9, [x29, #-12]
+; CHECK-NEXT:    ldr w9, [x8], #8
+; CHECK-NEXT:    stur x8, [x29, #-8]
+; CHECK-NEXT:    str w9, [sp, #16]
+; CHECK-NEXT:    ldur x8, [x29, #-8]
+; CHECK-NEXT:    ldr w9, [x8], #8
+; CHECK-NEXT:    stur x8, [x29, #-8]
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    str w9, [sp, #12]
+; CHECK-NEXT:    mov x21, x0
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #48 ; =48
+; CHECK-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -257,20 +402,47 @@ entry:
 
 ; "caller4" calls "foo_vararg" that takes a swifterror parameter.
 define float @caller4(i8* %error_ref) {
-; CHECK-LABEL: caller4:
-
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
-; CHECK: str {{x[0-9]+}}, [sp, #16]
-; CHECK: mov x21, xzr
-
-; CHECK: bl {{.*}}foo_vararg
-; CHECK: mov x0, x21
-; CHECK: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: bl {{.*}}free
+; CHECK-LABEL: caller4:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #96 ; =96
+; CHECK-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #80 ; =80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    mov w9, #11
+; CHECK-NEXT:    mov w10, #12
+; CHECK-NEXT:    stur w8, [x29, #-36]
+; CHECK-NEXT:    stp w10, w9, [sp, #36]
+; CHECK-NEXT:    mov w8, w8
+; CHECK-NEXT:    mov w9, w9
+; CHECK-NEXT:    mov w10, w10
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    stp x8, x9, [sp]
+; CHECK-NEXT:    str x10, [sp, #16]
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo_vararg
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    cbnz x21, LBB8_2
+; CHECK-NEXT:  ; %bb.1: ; %cont
+; CHECK-NEXT:    ldrb w8, [x0, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB8_2: ; %handler
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #96 ; =96
+; CHECK-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -303,73 +475,104 @@ handler:
 
 ; Check that we don't blow up on tail calling swifterror argument functions.
 define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: tailcallswifterror:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl _tailcallswifterror
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: tailcallswifterror_swiftcc:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    mov x29, sp
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 
-; CHECK-LABEL: params_in_reg
 ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
-; CHECK:  stp     x28, x0, [sp
-; CHECK:  stp     x27, x26, [sp
-; CHECK:  stp     x25, x24, [sp
-; CHECK:  stp     x23, x22, [sp
-; CHECK:  stp     x20, x19, [sp
-; CHECK:  stp     x29, x30, [sp
 ; Store argument registers.
-; CHECK:  mov      x20, x1
-; CHECK:  mov      x22, x2
-; CHECK:  mov      x23, x3
-; CHECK:  mov      x24, x4
-; CHECK:  mov      x25, x5
-; CHECK:  mov      x26, x6
-; CHECK:  mov      x27, x7
-; CHECK:  mov      x28, x21
 ; Setup call.
-; CHECK:  mov     w0, #1
-; CHECK:  mov     w1, #2
-; CHECK:  mov     w2, #3
-; CHECK:  mov     w3, #4
-; CHECK:  mov     w4, #5
-; CHECK:  mov     w5, #6
-; CHECK:  mov     w6, #7
-; CHECK:  mov     w7, #8
-; CHECK:  str     xzr, [sp]
-; CHECK:  mov      x21, xzr
-; CHECK:  bl      _params_in_reg2
 ; Restore original arguments for next call.
-; CHECK:  ldr      x0, [sp
-; CHECK:  mov      x1, x20
-; CHECK:  mov      x2, x22
-; CHECK:  mov      x3, x23
-; CHECK:  mov      x4, x24
-; CHECK:  mov      x5, x25
-; CHECK:  mov      x6, x26
-; CHECK:  mov      x7, x27
 ; Restore original swiftself argument and swifterror %err.
-; CHECK:  mov      x21, x28
-; CHECK:  bl      _params_in_reg2
 ; Restore calle save registers but don't clober swifterror x21.
-; CHECK-NOT: x21
-; CHECK:  ldp     x29, x30, [sp
-; CHECK-NOT: x21
-; CHECK:  ldp     x20, x19, [sp
-; CHECK-NOT: x21
-; CHECK:  ldp     x23, x22, [sp
-; CHECK-NOT: x21
-; CHECK:  ldp     x25, x24, [sp
-; CHECK-NOT: x21
-; CHECK:  ldp     x27, x26, [sp
-; CHECK-NOT: x21
-; CHECK:  ldr     x28, [sp
-; CHECK-NOT: x21
-; CHECK:  ret
 define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8*, %swift_error** nocapture swifterror %err) {
+; CHECK-LABEL: params_in_reg:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112 ; =112
+; CHECK-NEXT:    stp x28, x0, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x27, x26, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x25, x24, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x23, x22, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #96 ; =96
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w22, -40
+; CHECK-NEXT:    .cfi_offset w23, -48
+; CHECK-NEXT:    .cfi_offset w24, -56
+; CHECK-NEXT:    .cfi_offset w25, -64
+; CHECK-NEXT:    .cfi_offset w26, -72
+; CHECK-NEXT:    .cfi_offset w27, -80
+; CHECK-NEXT:    .cfi_offset w28, -96
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    mov x22, x2
+; CHECK-NEXT:    mov x23, x3
+; CHECK-NEXT:    mov x24, x4
+; CHECK-NEXT:    mov x25, x5
+; CHECK-NEXT:    mov x26, x6
+; CHECK-NEXT:    mov x27, x7
+; CHECK-NEXT:    ldr x19, [x29, #16]
+; CHECK-NEXT:    mov x28, x21
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w1, #2
+; CHECK-NEXT:    mov w2, #3
+; CHECK-NEXT:    mov w3, #4
+; CHECK-NEXT:    mov w4, #5
+; CHECK-NEXT:    mov w5, #6
+; CHECK-NEXT:    mov w6, #7
+; CHECK-NEXT:    mov w7, #8
+; CHECK-NEXT:    str xzr, [sp]
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _params_in_reg2
+; CHECK-NEXT:    str x19, [sp]
+; CHECK-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x22
+; CHECK-NEXT:    mov x3, x23
+; CHECK-NEXT:    mov x4, x24
+; CHECK-NEXT:    mov x5, x25
+; CHECK-NEXT:    mov x6, x26
+; CHECK-NEXT:    mov x7, x27
+; CHECK-NEXT:    mov x21, x28
+; CHECK-NEXT:    bl _params_in_reg2
+; CHECK-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x23, x22, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x25, x24, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x27, x26, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x28, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #112 ; =112
+; CHECK-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8*  null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -378,89 +581,108 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8*,
 }
 declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err)
 
-; CHECK-LABEL: params_and_return_in_reg
 ; Store callee saved registers.
-; CHECK:  stp     x28, x0, [sp, #16
-; CHECK:  stp     x27, x26, [sp
-; CHECK:  stp     x25, x24, [sp
-; CHECK:  stp     x23, x22, [sp
-; CHECK:  stp     x20, x19, [sp
-; CHECK:  stp     x29, x30, [sp
 ; Save original arguments.
-; CHECK:  mov      x20, x1
-; CHECK:  mov      x22, x2
-; CHECK:  mov      x23, x3
-; CHECK:  mov      x24, x4
-; CHECK:  mov      x25, x5
-; CHECK:  mov      x26, x6
-; CHECK:  mov      x27, x7
-; CHECK:  mov      x28, x21
 ; Setup call arguments.
-; CHECK:  mov     w0, #1
-; CHECK:  mov     w1, #2
-; CHECK:  mov     w2, #3
-; CHECK:  mov     w3, #4
-; CHECK:  mov     w4, #5
-; CHECK:  mov     w5, #6
-; CHECK:  mov     w6, #7
-; CHECK:  mov     w7, #8
-; CHECK:  mov      x21, xzr
-; CHECK:  bl      _params_in_reg2
 ; Store swifterror %error_ptr_ref.
-; CHECK:  stp     {{x[0-9]+}}, x21, [sp]
 ; Setup call arguments from original arguments.
-; CHECK:  ldr      x0, [sp, #24
-; CHECK:  mov      x1, x20
-; CHECK:  mov      x2, x22
-; CHECK:  mov      x3, x23
-; CHECK:  mov      x4, x24
-; CHECK:  mov      x5, x25
-; CHECK:  mov      x6, x26
-; CHECK:  mov      x7, x27
-; CHECK:  mov      x21, x28
-; CHECK:  bl      _params_and_return_in_reg2
 ; Store return values.
-; CHECK:  mov      x20, x0
-; CHECK:  mov      x22, x1
-; CHECK:  mov      x23, x2
-; CHECK:  mov      x24, x3
-; CHECK:  mov      x25, x4
-; CHECK:  mov      x26, x5
-; CHECK:  mov      x27, x6
-; CHECK:  mov      x28, x7
 ; Save swifterror %err.
-; CHECK:  mov      x19, x21
 ; Setup call.
-; CHECK:  mov     w0, #1
-; CHECK:  mov     w1, #2
-; CHECK:  mov     w2, #3
-; CHECK:  mov     w3, #4
-; CHECK:  mov     w4, #5
-; CHECK:  mov     w5, #6
-; CHECK:  mov     w6, #7
-; CHECK:  mov     w7, #8
 ; ... setup call with swiferror %error_ptr_ref.
-; CHECK:  ldr     x21, [sp, #8]
-; CHECK:  bl      _params_in_reg2
 ; Restore return values for return from this function.
-; CHECK:  mov      x0, x20
-; CHECK:  mov      x1, x22
-; CHECK:  mov      x2, x23
-; CHECK:  mov      x3, x24
-; CHECK:  mov      x4, x25
-; CHECK:  mov      x5, x26
-; CHECK:  mov      x6, x27
-; CHECK:  mov      x7, x28
-; CHECK:  mov      x21, x19
 ; Restore callee save registers.
-; CHECK:  ldp     x29, x30, [sp
-; CHECK:  ldp     x20, x19, [sp
-; CHECK:  ldp     x23, x22, [sp
-; CHECK:  ldp     x25, x24, [sp
-; CHECK:  ldp     x27, x26, [sp
-; CHECK:  ldr     x28, [sp
-; CHECK:  ret
 define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) {
+; CHECK-LABEL: params_and_return_in_reg:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112 ; =112
+; CHECK-NEXT:    stp x28, x0, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x27, x26, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x25, x24, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x23, x22, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #96 ; =96
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w22, -40
+; CHECK-NEXT:    .cfi_offset w23, -48
+; CHECK-NEXT:    .cfi_offset w24, -56
+; CHECK-NEXT:    .cfi_offset w25, -64
+; CHECK-NEXT:    .cfi_offset w26, -72
+; CHECK-NEXT:    .cfi_offset w27, -80
+; CHECK-NEXT:    .cfi_offset w28, -96
+; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    mov x22, x2
+; CHECK-NEXT:    mov x23, x3
+; CHECK-NEXT:    mov x24, x4
+; CHECK-NEXT:    mov x25, x5
+; CHECK-NEXT:    mov x26, x6
+; CHECK-NEXT:    mov x27, x7
+; CHECK-NEXT:    ldr x19, [x29, #16]
+; CHECK-NEXT:    mov x28, x21
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w1, #2
+; CHECK-NEXT:    mov w2, #3
+; CHECK-NEXT:    mov w3, #4
+; CHECK-NEXT:    mov w4, #5
+; CHECK-NEXT:    mov w5, #6
+; CHECK-NEXT:    mov w6, #7
+; CHECK-NEXT:    mov w7, #8
+; CHECK-NEXT:    str xzr, [sp]
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _params_in_reg2
+; CHECK-NEXT:    stp x19, x21, [sp] ; 8-byte Folded Spill
+; CHECK-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-NEXT:    mov x1, x20
+; CHECK-NEXT:    mov x2, x22
+; CHECK-NEXT:    mov x3, x23
+; CHECK-NEXT:    mov x4, x24
+; CHECK-NEXT:    mov x5, x25
+; CHECK-NEXT:    mov x6, x26
+; CHECK-NEXT:    mov x7, x27
+; CHECK-NEXT:    mov x21, x28
+; CHECK-NEXT:    bl _params_and_return_in_reg2
+; CHECK-NEXT:    mov x20, x0
+; CHECK-NEXT:    mov x22, x1
+; CHECK-NEXT:    mov x23, x2
+; CHECK-NEXT:    mov x24, x3
+; CHECK-NEXT:    mov x25, x4
+; CHECK-NEXT:    mov x26, x5
+; CHECK-NEXT:    mov x27, x6
+; CHECK-NEXT:    mov x28, x7
+; CHECK-NEXT:    mov x19, x21
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov w1, #2
+; CHECK-NEXT:    mov w2, #3
+; CHECK-NEXT:    mov w3, #4
+; CHECK-NEXT:    mov w4, #5
+; CHECK-NEXT:    mov w5, #6
+; CHECK-NEXT:    mov w6, #7
+; CHECK-NEXT:    mov w7, #8
+; CHECK-NEXT:    str xzr, [sp]
+; CHECK-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-NEXT:    bl _params_in_reg2
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    mov x1, x22
+; CHECK-NEXT:    mov x2, x23
+; CHECK-NEXT:    mov x3, x24
+; CHECK-NEXT:    mov x4, x25
+; CHECK-NEXT:    mov x5, x26
+; CHECK-NEXT:    mov x6, x27
+; CHECK-NEXT:    mov x7, x28
+; CHECK-NEXT:    mov x21, x19
+; CHECK-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x23, x22, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x25, x24, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x27, x26, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x28, [sp, #16] ; 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #112 ; =112
+; CHECK-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8*  null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -475,32 +697,69 @@ declare void @acallee(i8*)
 
 ; Make sure we don't tail call if the caller returns a swifterror value. We
 ; would have to move into the swifterror register before the tail call.
-; CHECK: tailcall_from_swifterror:
-; CHECK-NOT: b _acallee
-; CHECK: bl _acallee
 
 define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: tailcall_from_swifterror:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16 ; =16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x19, x21
+; CHECK-NEXT:    mov x0, xzr
+; CHECK-NEXT:    bl _acallee
+; CHECK-NEXT:    mov x21, x19
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   tail call void @acallee(i8* null)
   ret void
 }
 
-; CHECK: tailcall_from_swifterror2
-; CHECK-NOT: b _simple_fn
-; CHECK: bl _simple_fn
 declare void @simple_fn()
 define swiftcc void @tailcall_from_swifterror2(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: tailcall_from_swifterror2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16 ; =16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -32
+; CHECK-NEXT:    mov x19, x21
+; CHECK-NEXT:    bl _simple_fn
+; CHECK-NEXT:    mov x21, x19
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT:    ret
   tail call void @simple_fn()
   ret void
 }
 
 declare swiftcc void @foo2(%swift_error** swifterror)
-; CHECK-LABEL: testAssign
-; CHECK: mov      x21, xzr
-; CHECK: bl      _foo2
-; CHECK: mov      x0, x21
 
 define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+; CHECK-LABEL: testAssign:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16 ; =16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo2
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp], #32 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %error_ptr = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr
@@ -515,16 +774,35 @@ a:
 ; foo takes a swifterror parameter. We should be able to see that even when
 ; it isn't explicitly on the call.
 define float @swifterror_param_not_on_call(i8* %error_ref) {
-; CHECK-LABEL: swifterror_param_not_on_call:
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: bl {{.*}}foo
-; CHECK: mov x0, x21
-; CHECK: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: bl {{.*}}free
-
+; CHECK-LABEL: swifterror_param_not_on_call:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #32 ; =32
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    cbnz x21, LBB16_2
+; CHECK-NEXT:  ; %bb.1: ; %cont
+; CHECK-NEXT:    ldrb w8, [x0, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB16_2: ; %handler
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -546,17 +824,39 @@ handler:
 ; foo_sret takes an sret parameter and a swifterror parameter. We should be
 ; able to see that, even if it's not explicitly on the call.
 define float @swifterror_param_not_on_call2(i8* %error_ref) {
-; CHECK-LABEL: swifterror_param_not_on_call2:
-; CHECK: mov [[ID:x[0-9]+]], x0
-; CHECK: mov [[ZERO:x[0-9]+]], xzr
-; CHECK: bl {{.*}}foo_sret
-; CHECK: mov x0, x21
-; CHECK: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK: bl {{.*}}free
-
+; CHECK-LABEL: swifterror_param_not_on_call2:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #64 ; =64
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
+; CHECK-NEXT:    .cfi_offset w21, -40
+; CHECK-NEXT:    .cfi_offset w22, -48
+; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    add x8, sp, #8 ; =8
+; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    bl _foo_sret
+; CHECK-NEXT:    mov x0, x21
+; CHECK-NEXT:    cbnz x21, LBB17_2
+; CHECK-NEXT:  ; %bb.1: ; %cont
+; CHECK-NEXT:    ldrb w8, [x0, #8]
+; CHECK-NEXT:    strb w8, [x19]
+; CHECK-NEXT:  LBB17_2: ; %handler
+; CHECK-NEXT:    bl _free
+; CHECK-NEXT:    fmov s0, #1.00000000
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80 ; =80
+; CHECK-NEXT:    ret
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*

diff  --git a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll
index 89d0529c96674..132f54b623e2b 100644
--- a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll
+++ b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll
@@ -1,27 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-apple-ios7.0 -disable-block-placement -aarch64-tbz-offset-bits=4 -o - %s | FileCheck %s
 define i32 @test_asm_length(i32 %in) {
-; CHECK-LABEL: test_asm_length:
-
   ; It would be more natural to use just one "tbnz %false" here, but if the
   ; number of instructions in the asm is counted reasonably, that block is out
   ; of the limited range we gave tbz. So branch relaxation has to invert the
   ; condition.
-; CHECK:     tbz w0, #0, [[TRUE:LBB[0-9]+_[0-9]+]]
-; CHECK:     b [[FALSE:LBB[0-9]+_[0-9]+]]
-
-; CHECK: [[TRUE]]:
-; CHECK:     mov w0, #4
-; CHECK:     nop
-; CHECK:     nop
-; CHECK:     nop
-; CHECK:     nop
-; CHECK:     nop
-; CHECK:     nop
-; CHECK:     ret
-
-; CHECK: [[FALSE]]:
-; CHECK:     ret
-
+; CHECK-LABEL: test_asm_length:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    tbz w0, #0, LBB0_1
+; CHECK-NEXT:    b LBB0_2
+; CHECK-NEXT:  LBB0_1: ; %true
+; CHECK-NEXT:    mov w0, #4
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB0_2: ; %false
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
   %val = and i32 %in, 1
   %tst = icmp eq i32 %val, 0
   br i1 %tst, label %true, label %false

diff  --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index 4c208fa2732f9..6a68a668a16ac 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -1,7 +1,8 @@
-; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-AARCH64 %s
-; RUN: llc -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-AARCH64 %s
-; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-APPLE --check-prefix=CHECK-APPLE-ARM64_32 %s
-; RUN: llc -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-O0 --check-prefix=CHECK-O0-ARM64_32 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefixes=CHECK-APPLE,CHECK-APPLE-AARCH64 %s
+; RUN: llc -verify-machineinstrs -frame-pointer=all -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefixes=CHECK-O0-AARCH64 %s
+; RUN: llc -verify-machineinstrs -frame-pointer=all -enable-shrink-wrap=false < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefixes=CHECK-APPLE,CHECK-APPLE-ARM64_32 %s
+; RUN: llc -verify-machineinstrs -O0 -fast-isel < %s -mtriple=arm64_32-apple-ios -disable-post-ra | FileCheck -allow-deprecated-dag-overlap --check-prefixes=CHECK-O0-ARM64_32 %s
 
 declare i8* @malloc(i64)
 declare void @free(i8*)
@@ -11,20 +12,52 @@ declare void @free(i8*)
 ; that takes a swifterror parameter and "caller" is the caller of "foo".
 define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-APPLE-LABEL: foo:
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE-NOT: x21
-
-; CHECK-O0-LABEL: foo:
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
-; CHECK-O0-NOT: x21
-; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x0, #8]
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -36,24 +69,135 @@ entry:
 
 ; "caller" calls "foo" that takes a swifterror parameter.
 define float @caller(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w0
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
-
-; CHECK-O0-LABEL: caller:
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
+; CHECK-APPLE-AARCH64-LABEL: caller:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #64 ; =64
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #48 ; =48
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB1_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB1_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #64 ; =64
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #64 ; =64
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB1_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB1_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #64 ; =64
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #64 ; =64
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #48 ; =48
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB1_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB1_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #64 ; =64
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #64 ; =64
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0 ; =0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB1_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB1_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #64 ; =64
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -74,27 +218,175 @@ handler:
 
 ; "caller2" is the caller of "foo", it calls "foo" inside a loop.
 define float @caller2(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller2:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: fmov [[CMP:s[0-9]+]], #1.0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w21
-; CHECK-APPLE: fcmp s0, [[CMP]]
-; CHECK-APPLE: b.le
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x21, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE: bl {{.*}}free
-
-; CHECK-O0-LABEL: caller2:
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo
-; CHECK-O0: mov [[ID:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
+; CHECK-APPLE-AARCH64-LABEL: caller2:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-APPLE-AARCH64-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #64 ; =64
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset b8, -56
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset b9, -64
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    fmov s8, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-APPLE-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB2_4
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.2: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-AARCH64-NEXT:    fcmp s0, s8
+; CHECK-APPLE-AARCH64-NEXT:    b.le LBB2_1
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x21, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB2_4: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp d9, d8, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #80 ; =80
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller2:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #64 ; =64
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-O0-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo
+; CHECK-O0-AARCH64-NEXT:    str s0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB2_4
+; CHECK-O0-AARCH64-NEXT:  ; %bb.2: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldr s0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    fcmp s0, s1
+; CHECK-O0-AARCH64-NEXT:    b.le LBB2_1
+; CHECK-O0-AARCH64-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB2_4: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #80 ; =80
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller2:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    stp d9, d8, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64 ; =64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset b8, -56
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset b9, -64
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s8, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-APPLE-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w21, LBB2_4
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.2: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-APPLE-ARM64_32-NEXT:    fcmp s0, s8
+; CHECK-APPLE-ARM64_32-NEXT:    b.le LBB2_1
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x21, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB2_4: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp d9, d8, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller2:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  LBB2_1: ; %bb_loop
+; CHECK-O0-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo
+; CHECK-O0-ARM64_32-NEXT:    str s0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0 ; =0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB2_4
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.2: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB2_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr s0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    fcmp s0, s1
+; CHECK-O0-ARM64_32-NEXT:    b.le LBB2_1
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.3: ; %bb_end
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB2_4: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #80 ; =80
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   br label %bb_loop
@@ -121,29 +413,83 @@ handler:
 ; "foo_if" is a function that takes a swifterror parameter, it sets swifterror
 ; under a certain condition.
 define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
-; CHECK-APPLE-LABEL: foo_if:
-; CHECK-APPLE: cbz w0
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE: ret
-
-; CHECK-O0-LABEL: foo_if:
 ; spill x21
-; CHECK-O0: str x21, [sp, [[SLOT:#[0-9]+]]]
-; CHECK-O0: cbz w0
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov x21, x0
-; CHECK-O0: mov [[ID2:w[0-9]+]], #1
-; CHECK-O0: strb [[ID2]], [x0, #8]
-; CHECK-O0: ret
 ; reload from stack
-; CHECK-O0: ldr x21, [sp, [[SLOT]]]
-; CHECK-O0: ret
+; CHECK-APPLE-LABEL: foo_if:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    cbz w0, LBB3_2
+; CHECK-APPLE-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+; CHECK-APPLE-NEXT:  LBB3_2: ; %normal
+; CHECK-APPLE-NEXT:    movi d0, #0000000000000000
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_if:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbz w0, LBB3_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    ret
+; CHECK-O0-AARCH64-NEXT:  LBB3_2: ; %normal
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_if:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cbz w0, LBB3_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %gen_error
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    ret
+; CHECK-O0-ARM64_32-NEXT:  LBB3_2: ; %normal
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %cond = icmp ne i32 %cc, 0
   br i1 %cond, label %gen_error, label %normal
@@ -163,60 +509,136 @@ normal:
 ; "foo_loop" is a function that takes a swifterror parameter, it sets swifterror
 ; under a certain condition inside a loop.
 define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
-; CHECK-APPLE-LABEL: foo_loop:
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE: fcmp
-; CHECK-APPLE: b.gt
-; CHECK-APPLE: cbz
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: strb w{{.*}}, [x0, #8]
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE: ret
-
-; CHECK-O0-AARCH64-LABEL: foo_loop:
 ; spill x21
-; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]]
-; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]]
-; CHECK-O0-AARCH64: [[BB1]]:
-; CHECK-O0-AARCH64: ldur    x0, [x29, [[SLOT]]]
-; CHECK-O0-AARCH64: str     x0, [sp, [[SLOT2:#[0-9]+]]]
-; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
-; CHECK-O0-AARCH64: mov w{{.*}}, #16
-; CHECK-O0-AARCH64: malloc
-; CHECK-O0-AARCH64: mov [[ID:x[0-9]+]], x0
-; CHECK-O0-AARCH64: strb w{{.*}}, [{{.*}}[[ID]], #8]
 ; spill x0
-; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2]]]
-; CHECK-O0-AARCH64:[[BB2]]:
-; CHECK-O0-AARCH64: ldr     x0, [sp, [[SLOT2]]]
-; CHECK-O0-AARCH64: fcmp
-; CHECK-O0-AARCH64: stur     x0, [x29, [[SLOT]]]
-; CHECK-O0-AARCH64: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-AARCH64: ldr x21, [sp]
-; CHECK-O0-AARCH64: ret
-
-; CHECK-O0-ARM64_32-LABEL: foo_loop:
 ; spill x21
-; CHECK-O0-ARM64_32: str x21, [sp, [[SLOT:#[0-9]+]]]
-; CHECK-O0-ARM64_32: b [[BB1:[A-Za-z0-9_]*]]
-; CHECK-O0-ARM64_32: [[BB1]]:
-; CHECK-O0-ARM64_32: ldr     x0, [sp, [[SLOT]]]
-; CHECK-O0-ARM64_32: str     x0, [sp, [[SLOT2:#[0-9]+]]]
-; CHECK-O0-ARM64_32: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
-; CHECK-O0-ARM64_32: mov w{{.*}}, #16
-; CHECK-O0-ARM64_32: malloc
-; CHECK-O0-ARM64_32: mov {{.*}}, x0
-; CHECK-O0-ARM64_32: strb w{{.*}},
-; CHECK-O0-ARM64_32:[[BB2]]:
-; CHECK-O0-ARM64_32: ldr     x0, [sp, [[SLOT2]]]
-; CHECK-O0-ARM64_32: str     x0, [sp[[OFFSET:.*]]]
-; CHECK-O0-ARM64_32: fcmp
-; CHECK-O0-ARM64_32: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]]
-; CHECK-O0-ARM64_32: ret
+; CHECK-APPLE-LABEL: foo_loop:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp d9, d8, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #32 ; =32
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset b8, -40
+; CHECK-APPLE-NEXT:    .cfi_offset b9, -48
+; CHECK-APPLE-NEXT:    mov.16b v8, v0
+; CHECK-APPLE-NEXT:    mov w19, w0
+; CHECK-APPLE-NEXT:    mov x0, x21
+; CHECK-APPLE-NEXT:    mov w20, #1
+; CHECK-APPLE-NEXT:    fmov s9, #1.00000000
+; CHECK-APPLE-NEXT:    b LBB4_2
+; CHECK-APPLE-NEXT:  LBB4_1: ; %bb_cont
+; CHECK-APPLE-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    fcmp s8, s9
+; CHECK-APPLE-NEXT:    b.gt LBB4_4
+; CHECK-APPLE-NEXT:  LBB4_2: ; %bb_loop
+; CHECK-APPLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-APPLE-NEXT:    cbz w19, LBB4_1
+; CHECK-APPLE-NEXT:  ; %bb.3: ; %gen_error
+; CHECK-APPLE-NEXT:    ; in Loop: Header=BB4_2 Depth=1
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    strb w20, [x0, #8]
+; CHECK-APPLE-NEXT:    b LBB4_1
+; CHECK-APPLE-NEXT:  LBB4_4: ; %bb_end
+; CHECK-APPLE-NEXT:    movi d0, #0000000000000000
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp d9, d8, [sp], #48 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_loop:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str s0, [sp, #16] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur w0, [x29, #-12] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x21, [x29, #-8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    b LBB4_1
+; CHECK-O0-AARCH64-NEXT:  LBB4_1: ; %bb_loop
+; CHECK-O0-AARCH64-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldur w8, [x29, #-12] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x0, [x29, #-8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbz w8, LBB4_3
+; CHECK-O0-AARCH64-NEXT:  ; %bb.2: ; %gen_error
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x9, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9, #8]
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  LBB4_3: ; %bb_cont
+; CHECK-O0-AARCH64-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-AARCH64-NEXT:    ldr s0, [sp, #16] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    fcmp s0, s1
+; CHECK-O0-AARCH64-NEXT:    stur x0, [x29, #-8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    b.le LBB4_1
+; CHECK-O0-AARCH64-NEXT:  ; %bb.4: ; %bb_end
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_loop:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str s0, [sp, #16] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #20] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    b LBB4_1
+; CHECK-O0-ARM64_32-NEXT:  LBB4_1: ; %bb_loop
+; CHECK-O0-ARM64_32-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #20] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cbz w8, LBB4_3
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.2: ; %gen_error
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x9, x0
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x0 killed $x9
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x9
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9, #8]
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  LBB4_3: ; %bb_cont
+; CHECK-O0-ARM64_32-NEXT:    ; in Loop: Header=BB4_1 Depth=1
+; CHECK-O0-ARM64_32-NEXT:    ldr s0, [sp, #16] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    fmov s1, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    fcmp s0, s1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    b.le LBB4_1
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.4: ; %bb_end
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    movi d0, #0000000000000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   br label %bb_loop
 
@@ -244,28 +666,75 @@ bb_end:
 ; "foo_sret" is a function that takes a swifterror parameter, it also has a sret
 ; parameter.
 define void @foo_sret(%struct.S* sret(%struct.S) %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
-; CHECK-APPLE-LABEL: foo_sret:
-; CHECK-APPLE: mov [[SRET:x[0-9]+]], x8
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-; CHECK-APPLE: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE: strb [[ID]], [x0, #8]
-; CHECK-APPLE: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK-APPLE: mov x21, x0
-; CHECK-APPLE-NOT: x21
-
-; CHECK-O0-LABEL: foo_sret:
 ; spill x8
-; CHECK-O0-DAG: str x8
-; CHECK-O0: mov w{{.*}}, #16
-; CHECK-O0: malloc
-; CHECK-O0: mov	x10, x0
-; CHECK-O0: mov	x21, x10
-; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x10, #8]
 ; reload from stack
-; CHECK-O0: str w{{.*}}, [x8, #4]
-; CHECK-O0-NOT: x21
+; CHECK-APPLE-LABEL: foo_sret:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16 ; =16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    mov w19, w0
+; CHECK-APPLE-NEXT:    mov x20, x8
+; CHECK-APPLE-NEXT:    mov w0, #16
+; CHECK-APPLE-NEXT:    bl _malloc
+; CHECK-APPLE-NEXT:    mov w8, #1
+; CHECK-APPLE-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-NEXT:    str w19, [x20, #4]
+; CHECK-APPLE-NEXT:    mov x21, x0
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_sret:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    stur w0, [x29, #-4] ; 4-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x10, x0
+; CHECK-O0-AARCH64-NEXT:    ldur w0, [x29, #-4] ; 4-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x21, x10
+; CHECK-O0-AARCH64-NEXT:    mov w9, #1
+; CHECK-O0-AARCH64-NEXT:    strb w9, [x10, #8]
+; CHECK-O0-AARCH64-NEXT:    str w0, [x8, #4]
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_sret:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x10, x0
+; CHECK-O0-ARM64_32-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x10
+; CHECK-O0-ARM64_32-NEXT:    mov w9, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w9, [x10, #8]
+; CHECK-O0-ARM64_32-NEXT:    str w0, [x8, #4]
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -279,32 +748,146 @@ entry:
 
 ; "caller3" calls "foo_sret" that takes a swifterror parameter.
 define float @caller3(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller3:
-; CHECK-APPLE: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE: mov x21, xzr
-; CHECK-APPLE: bl {{.*}}foo_sret
-; CHECK-APPLE: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
-; CHECK-APPLE-ARM64_32: cbnz w0
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE: bl {{.*}}free
-
-; CHECK-O0-LABEL: caller3:
 ; spill x0
-; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK-O0: mov x21
-; CHECK-O0: bl {{.*}}foo_sret
-; CHECK-O0: mov [[ID2:x[0-9]+]], x21
-; CHECK-O0-AARCH64: cbnz x21
-; CHECK-O0-ARM64_32: cmp x21, #0
 ; Access part of the error object and save it to error_ref
 ; reload from stack
-; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]]
-; CHECK-O0: ldrb [[CODE:w[0-9]+]]
-; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: bl {{.*}}free
+; CHECK-APPLE-AARCH64-LABEL: caller3:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #64 ; =64
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    add x8, sp, #8 ; =8
+; CHECK-APPLE-AARCH64-NEXT:    mov w0, #1
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo_sret
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB6_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB6_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #80 ; =80
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller3:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #96 ; =96
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #80 ; =80
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    add x8, sp, #40 ; =40
+; CHECK-O0-AARCH64-NEXT:    mov w0, #1
+; CHECK-O0-AARCH64-NEXT:    bl _foo_sret
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB6_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB6_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #96 ; =96
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller3:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64 ; =64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    add x8, sp, #8 ; =8
+; CHECK-APPLE-ARM64_32-NEXT:    mov w0, #1
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo_sret
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB6_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB6_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller3:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #96 ; =96
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    add x8, sp, #40 ; =40
+; CHECK-O0-ARM64_32-NEXT:    mov w0, #1
+; CHECK-O0-ARM64_32-NEXT:    bl _foo_sret
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0 ; =0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB6_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB6_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #96 ; =96
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %s = alloca %struct.S, align 8
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -328,25 +911,151 @@ handler:
 ; variable number of arguments.
 declare void @llvm.va_start(i8*) nounwind
 define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
-; CHECK-APPLE-LABEL: foo_vararg:
-; CHECK-APPLE: mov w0, #16
-; CHECK-APPLE: malloc
-
 ; First vararg
-; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16]
-; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16
 ; Third vararg
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32]
-; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8]
 ; Second vararg
-; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24]
-
-; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1
-; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16
-; CHECK-APPLE-ARM64_32: strb [[ID]], [x0, #8]
-
-
+; CHECK-APPLE-AARCH64-LABEL: foo_vararg:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #32 ; =32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    mov w0, #16
+; CHECK-APPLE-AARCH64-NEXT:    bl _malloc
+; CHECK-APPLE-AARCH64-NEXT:    mov w8, #1
+; CHECK-APPLE-AARCH64-NEXT:    ldr w9, [x29, #16]
+; CHECK-APPLE-AARCH64-NEXT:    add x10, x29, #16 ; =16
+; CHECK-APPLE-AARCH64-NEXT:    ldr w11, [x29, #32]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    stur w9, [x29, #-12]
+; CHECK-APPLE-AARCH64-NEXT:    ldr w8, [x29, #24]
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    add x9, x10, #24 ; =24
+; CHECK-APPLE-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-APPLE-AARCH64-NEXT:    stp w11, w8, [sp, #12]
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, x0
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #48 ; =48
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: foo_vararg:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    mov w8, #16
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    bl _malloc
+; CHECK-O0-AARCH64-NEXT:    mov x21, x0
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-AARCH64-NEXT:    add x8, x29, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8 ; =8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-12]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8 ; =8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    str w8, [sp, #16]
+; CHECK-O0-AARCH64-NEXT:    ldur x8, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    add x9, x8, #8 ; =8
+; CHECK-O0-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-O0-AARCH64-NEXT:    ldr w8, [x8]
+; CHECK-O0-AARCH64-NEXT:    str w8, [sp, #12]
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: foo_vararg:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #32 ; =32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    mov w0, #16
+; CHECK-APPLE-ARM64_32-NEXT:    bl _malloc
+; CHECK-APPLE-ARM64_32-NEXT:    mov w8, #1
+; CHECK-APPLE-ARM64_32-NEXT:    add x9, x29, #16 ; =16
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    orr w8, w9, #0x4
+; CHECK-APPLE-ARM64_32-NEXT:    and x10, x9, #0xfffffff0
+; CHECK-APPLE-ARM64_32-NEXT:    stur w8, [x29, #-8]
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w11, [x10]
+; CHECK-APPLE-ARM64_32-NEXT:    orr w10, w9, #0x8
+; CHECK-APPLE-ARM64_32-NEXT:    stp w11, w10, [x29, #-12]
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-APPLE-ARM64_32-NEXT:    orr w9, w9, #0xc
+; CHECK-APPLE-ARM64_32-NEXT:    str w8, [sp, #16]
+; CHECK-APPLE-ARM64_32-NEXT:    stur w9, [x29, #-8]
+; CHECK-APPLE-ARM64_32-NEXT:    ldr w8, [x10]
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    str w8, [sp, #12]
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, x0
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #48 ; =48
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: foo_vararg:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #16
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _malloc
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x0
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x0, #8]
+; CHECK-O0-ARM64_32-NEXT:    add x8, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w8 killed $w8 killed $x8
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4 ; =4
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w9
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w0
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #20]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4 ; =4
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w9
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w0
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #16]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w8
+; CHECK-O0-ARM64_32-NEXT:    add w9, w9, #4 ; =4
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w9
+; CHECK-O0-ARM64_32-NEXT:    mov w9, w0
+; CHECK-O0-ARM64_32-NEXT:    str w9, [sp, #24]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [x8]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #12]
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -372,20 +1081,180 @@ entry:
 
 ; "caller4" calls "foo_vararg" that takes a swifterror parameter.
 define float @caller4(i8* %error_ref) {
-; CHECK-APPLE-LABEL: caller4:
-
-; CHECK-APPLE-AARCH64: mov [[ID:x[0-9]+]], x0
-; CHECK-APPLE-AARCH64: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
-; CHECK-APPLE-AARCH64: str {{x[0-9]+}}, [sp]
-
-; CHECK-APPLE-AARCH64: mov x21, xzr
-; CHECK-APPLE-AARCH64: bl {{.*}}foo_vararg
-; CHECK-APPLE-AARCH64: mov x0, x21
-; CHECK-APPLE-AARCH64: cbnz x21
 ; Access part of the error object and save it to error_ref
-; CHECK-APPLE-AARCH64: ldrb [[CODE:w[0-9]+]], [x0, #8]
-; CHECK-APPLE-AARCH64: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-APPLE-AARCH64: bl {{.*}}free
+; CHECK-APPLE-AARCH64-LABEL: caller4:
+; CHECK-APPLE-AARCH64:       ; %bb.0: ; %entry
+; CHECK-APPLE-AARCH64-NEXT:    sub sp, sp, #96 ; =96
+; CHECK-APPLE-AARCH64-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-AARCH64-NEXT:    add x29, sp, #80 ; =80
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-AARCH64-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-AARCH64-NEXT:    mov x19, x0
+; CHECK-APPLE-AARCH64-NEXT:    mov w8, #10
+; CHECK-APPLE-AARCH64-NEXT:    mov w9, #11
+; CHECK-APPLE-AARCH64-NEXT:    mov w10, #12
+; CHECK-APPLE-AARCH64-NEXT:    stp w9, w8, [sp, #32]
+; CHECK-APPLE-AARCH64-NEXT:    str w10, [sp, #28]
+; CHECK-APPLE-AARCH64-NEXT:    stp x9, x10, [sp, #8]
+; CHECK-APPLE-AARCH64-NEXT:    str x8, [sp]
+; CHECK-APPLE-AARCH64-NEXT:    mov x21, xzr
+; CHECK-APPLE-AARCH64-NEXT:    bl _foo_vararg
+; CHECK-APPLE-AARCH64-NEXT:    mov x0, x21
+; CHECK-APPLE-AARCH64-NEXT:    cbnz x21, LBB8_2
+; CHECK-APPLE-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-AARCH64-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x19]
+; CHECK-APPLE-AARCH64-NEXT:  LBB8_2: ; %handler
+; CHECK-APPLE-AARCH64-NEXT:    bl _free
+; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #96 ; =96
+; CHECK-APPLE-AARCH64-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: caller4:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #112 ; =112
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #96 ; =96
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    mov w8, #10
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-28]
+; CHECK-O0-AARCH64-NEXT:    mov w8, #11
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-32]
+; CHECK-O0-AARCH64-NEXT:    mov w8, #12
+; CHECK-O0-AARCH64-NEXT:    stur w8, [x29, #-36]
+; CHECK-O0-AARCH64-NEXT:    ldur w8, [x29, #-28]
+; CHECK-O0-AARCH64-NEXT:    ; kill: def $x8 killed $w8
+; CHECK-O0-AARCH64-NEXT:    ldur w9, [x29, #-32]
+; CHECK-O0-AARCH64-NEXT:    mov w10, w9
+; CHECK-O0-AARCH64-NEXT:    ldur w9, [x29, #-36]
+; CHECK-O0-AARCH64-NEXT:    mov w11, w9
+; CHECK-O0-AARCH64-NEXT:    mov x9, sp
+; CHECK-O0-AARCH64-NEXT:    str x11, [x9, #16]
+; CHECK-O0-AARCH64-NEXT:    str x10, [x9, #8]
+; CHECK-O0-AARCH64-NEXT:    str x8, [x9]
+; CHECK-O0-AARCH64-NEXT:    bl _foo_vararg
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, x21
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    cbnz x21, LBB8_2
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-AARCH64-NEXT:    ldr x9, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x8, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-AARCH64-NEXT:    strb w8, [x9]
+; CHECK-O0-AARCH64-NEXT:  LBB8_2: ; %handler
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _free
+; CHECK-O0-AARCH64-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #112 ; =112
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-APPLE-ARM64_32-LABEL: caller4:
+; CHECK-APPLE-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-APPLE-ARM64_32-NEXT:    sub sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-ARM64_32-NEXT:    add x29, sp, #64 ; =64
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w21, -40
+; CHECK-APPLE-ARM64_32-NEXT:    .cfi_offset w22, -48
+; CHECK-APPLE-ARM64_32-NEXT:    mov x19, x0
+; CHECK-APPLE-ARM64_32-NEXT:    mov w8, #10
+; CHECK-APPLE-ARM64_32-NEXT:    mov w9, #11
+; CHECK-APPLE-ARM64_32-NEXT:    mov w10, #12
+; CHECK-APPLE-ARM64_32-NEXT:    stp w9, w8, [sp, #20]
+; CHECK-APPLE-ARM64_32-NEXT:    str w10, [sp, #16]
+; CHECK-APPLE-ARM64_32-NEXT:    mov x9, #11
+; CHECK-APPLE-ARM64_32-NEXT:    movk x9, #12, lsl #32
+; CHECK-APPLE-ARM64_32-NEXT:    stur x9, [sp, #4]
+; CHECK-APPLE-ARM64_32-NEXT:    str w8, [sp]
+; CHECK-APPLE-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-APPLE-ARM64_32-NEXT:    bl _foo_vararg
+; CHECK-APPLE-ARM64_32-NEXT:    mov x0, x21
+; CHECK-APPLE-ARM64_32-NEXT:    cbnz w0, LBB8_2
+; CHECK-APPLE-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-APPLE-ARM64_32-NEXT:    ldrb w8, [x0, #8]
+; CHECK-APPLE-ARM64_32-NEXT:    strb w8, [x19]
+; CHECK-APPLE-ARM64_32-NEXT:  LBB8_2: ; %handler
+; CHECK-APPLE-ARM64_32-NEXT:    bl _free
+; CHECK-APPLE-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-ARM64_32-NEXT:    add sp, sp, #80 ; =80
+; CHECK-APPLE-ARM64_32-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: caller4:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #96 ; =96
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #64] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #10
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #56]
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #11
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #52]
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #12
+; CHECK-O0-ARM64_32-NEXT:    str w8, [sp, #48]
+; CHECK-O0-ARM64_32-NEXT:    ldr w8, [sp, #56]
+; CHECK-O0-ARM64_32-NEXT:    ldr w10, [sp, #52]
+; CHECK-O0-ARM64_32-NEXT:    ldr w11, [sp, #48]
+; CHECK-O0-ARM64_32-NEXT:    mov x9, sp
+; CHECK-O0-ARM64_32-NEXT:    str w11, [x9, #8]
+; CHECK-O0-ARM64_32-NEXT:    str w10, [x9, #4]
+; CHECK-O0-ARM64_32-NEXT:    str w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:    bl _foo_vararg
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, x21
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    cmp x21, #0 ; =0
+; CHECK-O0-ARM64_32-NEXT:    b.ne LBB8_2
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %cont
+; CHECK-O0-ARM64_32-NEXT:    ldr x9, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldrb w8, [x8, #8]
+; CHECK-O0-ARM64_32-NEXT:    ; kill: def $w0 killed $w8
+; CHECK-O0-ARM64_32-NEXT:    strb w8, [x9]
+; CHECK-O0-ARM64_32-NEXT:  LBB8_2: ; %handler
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _free
+; CHECK-O0-ARM64_32-NEXT:    fmov s0, #1.00000000
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #64] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #96 ; =96
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr_ref
@@ -418,91 +1287,350 @@ handler:
 
 ; Check that we don't blow up on tail calling swifterror argument functions.
 define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcallswifterror:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    bl _tailcallswifterror
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcallswifterror:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    bl _tailcallswifterror
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcallswifterror_swiftcc:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x29, sp
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcallswifterror_swiftcc:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #-16]! ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    bl _tailcallswifterror_swiftcc
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp], #16 ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
   ret float %0
 }
 
-; CHECK-APPLE-LABEL: swifterror_clobber
-; CHECK-APPLE: mov [[REG:x[0-9]+]], x21
-; CHECK-APPLE: nop
-; CHECK-APPLE: mov x21, [[REG]]
 define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: swifterror_clobber:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x29, sp
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    mov x8, x21
+; CHECK-APPLE-NEXT:    ; InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ; InlineAsm End
+; CHECK-APPLE-NEXT:    mov x21, x8
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: swifterror_clobber:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm Start
+; CHECK-O0-AARCH64-NEXT:    nop
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm End
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: swifterror_clobber:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #16 ; =16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm Start
+; CHECK-O0-ARM64_32-NEXT:    nop
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm End
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #16 ; =16
+; CHECK-O0-ARM64_32-NEXT:    ret
   call void asm sideeffect "nop", "~{x21}"()
   ret void
 }
 
-; CHECK-APPLE-LABEL: swifterror_reg_clobber
-; CHECK-APPLE: stp {{.*}}x21
-; CHECK-APPLE: nop
-; CHECK-APPLE: ldp  {{.*}}x21
 define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
+; CHECK-APPLE-LABEL: swifterror_reg_clobber:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16 ; =16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w21, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -32
+; CHECK-APPLE-NEXT:    ; InlineAsm Start
+; CHECK-APPLE-NEXT:    nop
+; CHECK-APPLE-NEXT:    ; InlineAsm End
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x22, x21, [sp], #32 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: swifterror_reg_clobber:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm Start
+; CHECK-O0-AARCH64-NEXT:    nop
+; CHECK-O0-AARCH64-NEXT:    ; InlineAsm End
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp], #32 ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: swifterror_reg_clobber:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -16
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm Start
+; CHECK-O0-ARM64_32-NEXT:    nop
+; CHECK-O0-ARM64_32-NEXT:    ; InlineAsm End
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp], #16 ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ret
   call void asm sideeffect "nop", "~{x21}"()
   ret void
 }
-; CHECK-APPLE-LABEL: params_in_reg
 ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
-; CHECK-APPLE:  stp     x21, x28, [sp
-; CHECK-APPLE:  stp     x27, x26, [sp
-; CHECK-APPLE:  stp     x25, x24, [sp
-; CHECK-APPLE:  stp     x23, x22, [sp
-; CHECK-APPLE:  stp     x20, x19, [sp
-; CHECK-APPLE:  stp     x29, x30, [sp
-; CHECK-APPLE:  str     x20, [sp
 ; Store argument registers.
-; CHECK-APPLE:  mov      x23, x7
-; CHECK-APPLE:  mov      x24, x6
-; CHECK-APPLE:  mov      x25, x5
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x3
-; CHECK-APPLE:  mov      x28, x2
-; CHECK-APPLE:  mov      x19, x1
-; CHECK-APPLE:  mov      x22, x0
 ; Setup call.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov      x20, xzr
-; CHECK-APPLE:  mov      x21, xzr
-; CHECK-APPLE:  bl      _params_in_reg2
 ; Restore original arguments for next call.
-; CHECK-APPLE:  mov      x0, x22
-; CHECK-APPLE:  mov      x1, x19
-; CHECK-APPLE:  mov      x2, x28
-; CHECK-APPLE:  mov      x3, x27
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x25
-; CHECK-APPLE:  mov      x6, x24
-; CHECK-APPLE:  mov      x7, x23
 ; Restore original swiftself argument and swifterror %err.
-; CHECK-APPLE:  ldp             x20, x21, [sp
-; CHECK-APPLE:  bl      _params_in_reg2
 ; Restore calle save registers but don't clober swifterror x21.
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x29, x30, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x20, x19, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x23, x22, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x25, x24, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldp     x27, x26, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ldr     x28, [sp
-; CHECK-APPLE-NOT: x21
-; CHECK-APPLE:  ret
 define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_in_reg:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    sub sp, sp, #112 ; =112
+; CHECK-APPLE-NEXT:    stp x21, x28, [sp, #8] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x23, x22, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #96 ; =96
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -40
+; CHECK-APPLE-NEXT:    .cfi_offset w23, -48
+; CHECK-APPLE-NEXT:    .cfi_offset w24, -56
+; CHECK-APPLE-NEXT:    .cfi_offset w25, -64
+; CHECK-APPLE-NEXT:    .cfi_offset w26, -72
+; CHECK-APPLE-NEXT:    .cfi_offset w27, -80
+; CHECK-APPLE-NEXT:    .cfi_offset w28, -96
+; CHECK-APPLE-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x23, x7
+; CHECK-APPLE-NEXT:    mov x24, x6
+; CHECK-APPLE-NEXT:    mov x25, x5
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x3
+; CHECK-APPLE-NEXT:    mov x28, x2
+; CHECK-APPLE-NEXT:    mov x19, x1
+; CHECK-APPLE-NEXT:    mov x22, x0
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    mov x0, x22
+; CHECK-APPLE-NEXT:    mov x1, x19
+; CHECK-APPLE-NEXT:    mov x2, x28
+; CHECK-APPLE-NEXT:    mov x3, x27
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x25
+; CHECK-APPLE-NEXT:    mov x6, x24
+; CHECK-APPLE-NEXT:    mov x7, x23
+; CHECK-APPLE-NEXT:    ldp x20, x21, [sp] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x23, x22, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x25, x24, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x27, x26, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldr x28, [sp, #16] ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #112 ; =112
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: params_in_reg:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #128 ; =128
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #112] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #112 ; =112
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w20, -32
+; CHECK-O0-AARCH64-NEXT:    stur x21, [x29, #-32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x7, [x29, #-40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stur x6, [x29, #-48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x0
+; CHECK-O0-AARCH64-NEXT:    mov x20, xzr
+; CHECK-O0-AARCH64-NEXT:    mov x21, x20
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #2
+; CHECK-O0-AARCH64-NEXT:    mov w1, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #3
+; CHECK-O0-AARCH64-NEXT:    mov w2, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #4
+; CHECK-O0-AARCH64-NEXT:    mov w3, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #5
+; CHECK-O0-AARCH64-NEXT:    mov w4, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #6
+; CHECK-O0-AARCH64-NEXT:    mov w5, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #7
+; CHECK-O0-AARCH64-NEXT:    mov w6, w8
+; CHECK-O0-AARCH64-NEXT:    mov w8, #8
+; CHECK-O0-AARCH64-NEXT:    mov w7, w8
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #112] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #128 ; =128
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: params_in_reg:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #112 ; =112
+; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #96] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov x20, xzr
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x20
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #2
+; CHECK-O0-ARM64_32-NEXT:    mov w1, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #3
+; CHECK-O0-ARM64_32-NEXT:    mov w2, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #4
+; CHECK-O0-ARM64_32-NEXT:    mov w3, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #5
+; CHECK-O0-ARM64_32-NEXT:    mov w4, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #6
+; CHECK-O0-ARM64_32-NEXT:    mov w5, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #7
+; CHECK-O0-ARM64_32-NEXT:    mov w6, w8
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #8
+; CHECK-O0-ARM64_32-NEXT:    mov w7, w8
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldp x20, x30, [sp, #96] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #112 ; =112
+; CHECK-O0-ARM64_32-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -511,91 +1639,319 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* s
 }
 declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err)
 
-; CHECK-APPLE-LABEL: params_and_return_in_reg
 ; Store callee saved registers.
-; CHECK-APPLE:  stp     x20, x28, [sp, #24
-; CHECK-APPLE:  stp     x27, x26, [sp
-; CHECK-APPLE:  stp     x25, x24, [sp
-; CHECK-APPLE:  stp     x23, x22, [sp
-; CHECK-APPLE:  stp     x20, x19, [sp
-; CHECK-APPLE:  stp     x29, x30, [sp
 ; Save original arguments.
-; CHECK-APPLE:  mov      x23, x21
-; CHECK-APPLE:  str     x7, [sp, #16]
-; CHECK-APPLE:  mov      x24, x6
-; CHECK-APPLE:  mov      x25, x5
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x3
-; CHECK-APPLE:  mov      x28, x2
-; CHECK-APPLE:  mov      x19, x1
-; CHECK-APPLE:  mov      x22, x0
 ; Setup call arguments.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov      x20, xzr
-; CHECK-APPLE:  mov      x21, xzr
-; CHECK-APPLE:  bl      _params_in_reg2
 ; Store swifterror %error_ptr_ref.
-; CHECK-APPLE:  str     x21, [sp, #8]
 ; Setup call arguments from original arguments.
-; CHECK-APPLE:  mov      x0, x22
-; CHECK-APPLE:  mov      x1, x19
-; CHECK-APPLE:  mov      x2, x28
-; CHECK-APPLE:  mov      x3, x27
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x25
-; CHECK-APPLE:  mov      x6, x24
-; CHECK-APPLE:  ldp     x7, x20, [sp, #16]
-; CHECK-APPLE:  mov      x21, x23
-; CHECK-APPLE:  bl      _params_and_return_in_reg2
 ; Store return values.
-; CHECK-APPLE:  mov      x19, x0
-; CHECK-APPLE:  mov      x22, x1
-; CHECK-APPLE:  mov      x24, x2
-; CHECK-APPLE:  mov      x25, x3
-; CHECK-APPLE:  mov      x26, x4
-; CHECK-APPLE:  mov      x27, x5
-; CHECK-APPLE:  mov      x28, x6
-; CHECK-APPLE:  mov      x23, x7
 ; Save swifterror %err.
-; CHECK-APPLE:  str     x21, [sp, #24]
 ; Setup call.
-; CHECK-APPLE:  mov     w0, #1
-; CHECK-APPLE:  mov     w1, #2
-; CHECK-APPLE:  mov     w2, #3
-; CHECK-APPLE:  mov     w3, #4
-; CHECK-APPLE:  mov     w4, #5
-; CHECK-APPLE:  mov     w5, #6
-; CHECK-APPLE:  mov     w6, #7
-; CHECK-APPLE:  mov     w7, #8
-; CHECK-APPLE:  mov     x20, xzr
 ; ... setup call with swiferror %error_ptr_ref.
-; CHECK-APPLE:  ldr     x21, [sp, #8]
-; CHECK-APPLE:  bl      _params_in_reg2
 ; Restore return values for return from this function.
-; CHECK-APPLE:  mov      x0, x19
-; CHECK-APPLE:  mov      x1, x22
-; CHECK-APPLE:  mov      x2, x24
-; CHECK-APPLE:  mov      x3, x25
-; CHECK-APPLE:  mov      x4, x26
-; CHECK-APPLE:  mov      x5, x27
-; CHECK-APPLE:  mov      x6, x28
-; CHECK-APPLE:  mov      x7, x23
 ; Restore swifterror %err and callee save registers.
-; CHECK-APPLE:  ldp     x21, x28, [sp, #24
-; CHECK-APPLE:  ldp     x29, x30, [sp
-; CHECK-APPLE:  ldp     x20, x19, [sp
-; CHECK-APPLE:  ldp     x23, x22, [sp
-; CHECK-APPLE:  ldp     x25, x24, [sp
-; CHECK-APPLE:  ldp     x27, x26, [sp
-; CHECK-APPLE:  ret
 define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) {
+; CHECK-APPLE-LABEL: params_and_return_in_reg:
+; CHECK-APPLE:       ; %bb.0:
+; CHECK-APPLE-NEXT:    sub sp, sp, #128 ; =128
+; CHECK-APPLE-NEXT:    stp x20, x28, [sp, #24] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x27, x26, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x25, x24, [sp, #64] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x23, x22, [sp, #80] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #96] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #112] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #112 ; =112
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -40
+; CHECK-APPLE-NEXT:    .cfi_offset w23, -48
+; CHECK-APPLE-NEXT:    .cfi_offset w24, -56
+; CHECK-APPLE-NEXT:    .cfi_offset w25, -64
+; CHECK-APPLE-NEXT:    .cfi_offset w26, -72
+; CHECK-APPLE-NEXT:    .cfi_offset w27, -80
+; CHECK-APPLE-NEXT:    .cfi_offset w28, -96
+; CHECK-APPLE-NEXT:    mov x23, x21
+; CHECK-APPLE-NEXT:    str x7, [sp, #16] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x24, x6
+; CHECK-APPLE-NEXT:    mov x25, x5
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x3
+; CHECK-APPLE-NEXT:    mov x28, x2
+; CHECK-APPLE-NEXT:    mov x19, x1
+; CHECK-APPLE-NEXT:    mov x22, x0
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov x0, x22
+; CHECK-APPLE-NEXT:    mov x1, x19
+; CHECK-APPLE-NEXT:    mov x2, x28
+; CHECK-APPLE-NEXT:    mov x3, x27
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x25
+; CHECK-APPLE-NEXT:    mov x6, x24
+; CHECK-APPLE-NEXT:    ldp x7, x20, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    mov x21, x23
+; CHECK-APPLE-NEXT:    bl _params_and_return_in_reg2
+; CHECK-APPLE-NEXT:    mov x19, x0
+; CHECK-APPLE-NEXT:    mov x22, x1
+; CHECK-APPLE-NEXT:    mov x24, x2
+; CHECK-APPLE-NEXT:    mov x25, x3
+; CHECK-APPLE-NEXT:    mov x26, x4
+; CHECK-APPLE-NEXT:    mov x27, x5
+; CHECK-APPLE-NEXT:    mov x28, x6
+; CHECK-APPLE-NEXT:    mov x23, x7
+; CHECK-APPLE-NEXT:    str x21, [sp, #24] ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    mov w0, #1
+; CHECK-APPLE-NEXT:    mov w1, #2
+; CHECK-APPLE-NEXT:    mov w2, #3
+; CHECK-APPLE-NEXT:    mov w3, #4
+; CHECK-APPLE-NEXT:    mov w4, #5
+; CHECK-APPLE-NEXT:    mov w5, #6
+; CHECK-APPLE-NEXT:    mov w6, #7
+; CHECK-APPLE-NEXT:    mov w7, #8
+; CHECK-APPLE-NEXT:    mov x20, xzr
+; CHECK-APPLE-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    bl _params_in_reg2
+; CHECK-APPLE-NEXT:    mov x0, x19
+; CHECK-APPLE-NEXT:    mov x1, x22
+; CHECK-APPLE-NEXT:    mov x2, x24
+; CHECK-APPLE-NEXT:    mov x3, x25
+; CHECK-APPLE-NEXT:    mov x4, x26
+; CHECK-APPLE-NEXT:    mov x5, x27
+; CHECK-APPLE-NEXT:    mov x6, x28
+; CHECK-APPLE-NEXT:    mov x7, x23
+; CHECK-APPLE-NEXT:    ldp x21, x28, [sp, #24] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #112] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x20, x19, [sp, #96] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x23, x22, [sp, #80] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x25, x24, [sp, #64] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x27, x26, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #128 ; =128
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: params_and_return_in_reg:
+; CHECK-O0-AARCH64:       ; %bb.0:
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #272 ; =272
+; CHECK-O0-AARCH64-NEXT:    stp x28, x20, [sp, #240] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #256] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #256 ; =256
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w20, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w28, -32
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x7, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x6, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x0
+; CHECK-O0-AARCH64-NEXT:    mov x20, xzr
+; CHECK-O0-AARCH64-NEXT:    str x20, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x21, x20
+; CHECK-O0-AARCH64-NEXT:    mov w8, #1
+; CHECK-O0-AARCH64-NEXT:    mov w0, w8
+; CHECK-O0-AARCH64-NEXT:    str x0, [sp, #88] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #2
+; CHECK-O0-AARCH64-NEXT:    mov w1, w8
+; CHECK-O0-AARCH64-NEXT:    str x1, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #3
+; CHECK-O0-AARCH64-NEXT:    mov w2, w8
+; CHECK-O0-AARCH64-NEXT:    str x2, [sp, #104] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #4
+; CHECK-O0-AARCH64-NEXT:    mov w3, w8
+; CHECK-O0-AARCH64-NEXT:    str x3, [sp, #112] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #5
+; CHECK-O0-AARCH64-NEXT:    mov w4, w8
+; CHECK-O0-AARCH64-NEXT:    str x4, [sp, #120] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #6
+; CHECK-O0-AARCH64-NEXT:    mov w5, w8
+; CHECK-O0-AARCH64-NEXT:    str x5, [sp, #128] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #7
+; CHECK-O0-AARCH64-NEXT:    mov w6, w8
+; CHECK-O0-AARCH64-NEXT:    stur x6, [x29, #-120] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov w8, #8
+; CHECK-O0-AARCH64-NEXT:    mov w7, w8
+; CHECK-O0-AARCH64-NEXT:    stur x7, [x29, #-112] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x6, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldr x7, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-104] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_and_return_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldr x20, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x0
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp, #88] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-96] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x1
+; CHECK-O0-AARCH64-NEXT:    ldr x1, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-88] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x2
+; CHECK-O0-AARCH64-NEXT:    ldr x2, [sp, #104] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-80] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x3
+; CHECK-O0-AARCH64-NEXT:    ldr x3, [sp, #112] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-72] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x4
+; CHECK-O0-AARCH64-NEXT:    ldr x4, [sp, #120] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-64] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x5
+; CHECK-O0-AARCH64-NEXT:    ldr x5, [sp, #128] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-56] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x6
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-120] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-48] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x7
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-112] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-40] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-104] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    stur x8, [x29, #-32] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    bl _params_in_reg2
+; CHECK-O0-AARCH64-NEXT:    ldur x0, [x29, #-96] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x1, [x29, #-88] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x2, [x29, #-80] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x3, [x29, #-72] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x4, [x29, #-64] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x5, [x29, #-56] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x6, [x29, #-48] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldur x7, [x29, #-40] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    mov x8, x21
+; CHECK-O0-AARCH64-NEXT:    ldur x21, [x29, #-32] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #256] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x28, x20, [sp, #240] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #272 ; =272
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: params_and_return_in_reg:
+; CHECK-O0-ARM64_32:       ; %bb.0:
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #272 ; =272
+; CHECK-O0-ARM64_32-NEXT:    str x28, [sp, #240] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x20, x30, [sp, #256] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w20, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w28, -32
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #72] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #64] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #56] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #48] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #40] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #32] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #24] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x0
+; CHECK-O0-ARM64_32-NEXT:    mov x20, xzr
+; CHECK-O0-ARM64_32-NEXT:    str x20, [sp, #80] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x21, x20
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #1
+; CHECK-O0-ARM64_32-NEXT:    mov w0, w8
+; CHECK-O0-ARM64_32-NEXT:    str x0, [sp, #88] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #2
+; CHECK-O0-ARM64_32-NEXT:    mov w1, w8
+; CHECK-O0-ARM64_32-NEXT:    str x1, [sp, #96] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #3
+; CHECK-O0-ARM64_32-NEXT:    mov w2, w8
+; CHECK-O0-ARM64_32-NEXT:    str x2, [sp, #104] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #4
+; CHECK-O0-ARM64_32-NEXT:    mov w3, w8
+; CHECK-O0-ARM64_32-NEXT:    str x3, [sp, #112] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #5
+; CHECK-O0-ARM64_32-NEXT:    mov w4, w8
+; CHECK-O0-ARM64_32-NEXT:    str x4, [sp, #120] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #6
+; CHECK-O0-ARM64_32-NEXT:    mov w5, w8
+; CHECK-O0-ARM64_32-NEXT:    str x5, [sp, #128] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #7
+; CHECK-O0-ARM64_32-NEXT:    mov w6, w8
+; CHECK-O0-ARM64_32-NEXT:    str x6, [sp, #136] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov w8, #8
+; CHECK-O0-ARM64_32-NEXT:    mov w7, w8
+; CHECK-O0-ARM64_32-NEXT:    str x7, [sp, #144] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #32] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #40] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #48] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #56] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #64] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #72] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #152] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_and_return_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x20, [sp, #80] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x0
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #88] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #160] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x1
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #96] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #168] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x2
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #104] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #176] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x3
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #112] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #184] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x4
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #120] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #192] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x5
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #128] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #200] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x6
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #136] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #208] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x7
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #144] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #216] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #152] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    str x8, [sp, #224] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    bl _params_in_reg2
+; CHECK-O0-ARM64_32-NEXT:    ldr x0, [sp, #160] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x1, [sp, #168] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x2, [sp, #176] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x3, [sp, #184] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x4, [sp, #192] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x5, [sp, #200] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x6, [sp, #208] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x7, [sp, #216] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    mov x8, x21
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #224] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x20, x30, [sp, #256] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x28, [sp, #240] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #272 ; =272
+; CHECK-O0-ARM64_32-NEXT:    ret
   %error_ptr_ref = alloca swifterror %swift_error*, align 8
   store %swift_error* null, %swift_error** %error_ptr_ref
   call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8* swiftself null, %swift_error** nocapture swifterror %error_ptr_ref)
@@ -610,11 +1966,54 @@ declare void @acallee(i8*)
 
 ; Make sure we don't tail call if the caller returns a swifterror value. We
 ; would have to move into the swifterror register before the tail call.
-; CHECK-APPLE: tailcall_from_swifterror:
-; CHECK-APPLE-NOT: b _acallee
-; CHECK-APPLE: bl _acallee
 
 define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-APPLE-LABEL: tailcall_from_swifterror:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #16 ; =16
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -32
+; CHECK-APPLE-NEXT:    mov x19, x21
+; CHECK-APPLE-NEXT:    mov x0, xzr
+; CHECK-APPLE-NEXT:    bl _acallee
+; CHECK-APPLE-NEXT:    mov x21, x19
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: tailcall_from_swifterror:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #16 ; =16
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    mov x0, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _acallee
+; CHECK-O0-AARCH64-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: tailcall_from_swifterror:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    str x30, [sp, #16] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -16
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp, #8] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    mov x0, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _acallee
+; CHECK-O0-ARM64_32-NEXT:    ldr x21, [sp, #8] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldr x30, [sp, #16] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #32 ; =32
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   tail call void @acallee(i8* null)
   ret void
@@ -623,18 +2022,71 @@ entry:
 declare swiftcc void @foo2(%swift_error** swifterror)
 
 ; Make sure we properly assign registers during fast-isel.
-; CHECK-O0-LABEL: testAssign
-; CHECK-O0: mov     x21, xzr
-; CHECK-O0: bl      _foo2
-; CHECK-O0: str     x21, [s[[STK:.*]]]
-; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]]
 
-; CHECK-APPLE-LABEL: testAssign
-; CHECK-APPLE: mov      x21, xzr
-; CHECK-APPLE: bl      _foo2
-; CHECK-APPLE: mov      x0, x21
 
 define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+; CHECK-APPLE-LABEL: testAssign:
+; CHECK-APPLE:       ; %bb.0: ; %entry
+; CHECK-APPLE-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-APPLE-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    add x29, sp, #32 ; =32
+; CHECK-APPLE-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w21, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w22, -32
+; CHECK-APPLE-NEXT:    mov x21, xzr
+; CHECK-APPLE-NEXT:    bl _foo2
+; CHECK-APPLE-NEXT:    mov x0, x21
+; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT:    add sp, sp, #48 ; =48
+; CHECK-APPLE-NEXT:    ret
+;
+; CHECK-O0-AARCH64-LABEL: testAssign:
+; CHECK-O0-AARCH64:       ; %bb.0: ; %entry
+; CHECK-O0-AARCH64-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:    add x29, sp, #32 ; =32
+; CHECK-O0-AARCH64-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-AARCH64-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-AARCH64-NEXT:    ; implicit-def: $x1
+; CHECK-O0-AARCH64-NEXT:    mov x21, xzr
+; CHECK-O0-AARCH64-NEXT:    bl _foo2
+; CHECK-O0-AARCH64-NEXT:    str x21, [sp] ; 8-byte Folded Spill
+; CHECK-O0-AARCH64-NEXT:  ; %bb.1: ; %a
+; CHECK-O0-AARCH64-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-AARCH64-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-AARCH64-NEXT:    ret
+;
+; CHECK-O0-ARM64_32-LABEL: testAssign:
+; CHECK-O0-ARM64_32:       ; %bb.0: ; %entry
+; CHECK-O0-ARM64_32-NEXT:    sub sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w30, -8
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w29, -16
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w21, -24
+; CHECK-O0-ARM64_32-NEXT:    .cfi_offset w22, -32
+; CHECK-O0-ARM64_32-NEXT:    ; implicit-def: $x1
+; CHECK-O0-ARM64_32-NEXT:    mov x21, xzr
+; CHECK-O0-ARM64_32-NEXT:    bl _foo2
+; CHECK-O0-ARM64_32-NEXT:    str x21, [sp] ; 8-byte Folded Spill
+; CHECK-O0-ARM64_32-NEXT:  ; %bb.1: ; %a
+; CHECK-O0-ARM64_32-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    and x0, x8, #0xffffffff
+; CHECK-O0-ARM64_32-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; CHECK-O0-ARM64_32-NEXT:    add sp, sp, #48 ; =48
+; CHECK-O0-ARM64_32-NEXT:    ret
 entry:
   %error_ptr = alloca swifterror %swift_error*
   store %swift_error* null, %swift_error** %error_ptr

diff  --git a/llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll
index a39a8e8da00cc..72a3eb850cdc7 100644
--- a/llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt-iter-indbr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple thumbv7s-apple-darwin  -asm-verbose=false | FileCheck %s
 ; RUN: llc < %s -mtriple thumbv7s-apple-darwin  -asm-verbose=false -stop-after=if-converter | FileCheck --check-prefix=CHECK-PROB %s
 
@@ -8,34 +9,62 @@ declare i8* @bar(i32, i8*, i8*)
 ; (predicated) indirectbr terminator.
 ; If we do, we would ignore its fallthrough successor.
 
-
-; CHECK-LABEL: test:
-; CHECK:       cmp {{.*}}, #21
-; CHECK-NEXT:  itt eq
-; CHECK-NEXT:  streq.w
-; CHECK-NEXT:  moveq pc
-; CHECK-NEXT: LBB{{[0-9_]+}}:
-; CHECK-NEXT:  cmp {{.*}}, #42
-; CHECK-NEXT:  beq [[CALL_FOO_1234:LBB[0-9_]+]]
-; CHECK-NEXT:  ldr {{.*}}[sp
-; CHECK-NEXT:  str
-; CHECK-NEXT:  mov pc
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT: [[CALL_FOO_1234]]:
-; CHECK-NEXT:  movw r0, #1234
-; CHECK-NEXT:  b [[FOOCALL:LBB[0-9_]+]]
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT: LBB{{[0-9_]+}}:
-; CHECK-NEXT:  movw r0, #4567
-; CHECK-NEXT: [[FOOCALL]]:
-; CHECK-NEXT:  bl _foo
-;
 ; CHECK-PROB: bb.0{{[0-9a-zA-Z.]*}}:
 ; CHECK-PROB: successors: %bb.1(0x40000000), %bb.3(0x20000000), %bb.5(0x20000000)
 ; CHECK-PROB: bb.2{{[0-9a-zA-Z.]*}}:
 ; CHECK-PROB: successors: %bb.3(0x40000000), %bb.5(0x40000000)
 
 define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) "frame-pointer"="all" {
+; CHECK-LABEL: test:
+; CHECK:         push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    push.w {r8, r10, r11}
+; CHECK-NEXT:    str r3, [sp, #-4]!
+; CHECK-NEXT:    mov r11, r2
+; CHECK-NEXT:    mov r4, r1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    ldr r6, LCPI0_0
+; CHECK-NEXT:  LPC0_1:
+; CHECK-NEXT:    add r6, pc
+; CHECK-NEXT:    ldr.w r8, LCPI0_1
+; CHECK-NEXT:  LPC0_0:
+; CHECK-NEXT:    add r8, pc
+; CHECK-NEXT:    movs r0, #1
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    bl _bar
+; CHECK-NEXT:    mov r10, r0
+; CHECK-NEXT:    movs r0, #2
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    bl _bar
+; CHECK-NEXT:    movs r0, #3
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    bl _bar
+; CHECK-NEXT:    cmp r5, #21
+; CHECK-NEXT:    itt eq
+; CHECK-NEXT:    streq.w r5, [r11]
+; CHECK-NEXT:    moveq pc, r0
+; CHECK-NEXT:  LBB0_1:
+; CHECK-NEXT:    cmp r4, #42
+; CHECK-NEXT:    beq LBB0_3
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    str r5, [r0]
+; CHECK-NEXT:    mov pc, r10
+; CHECK-NEXT:  Ltmp0:
+; CHECK-NEXT:  LBB0_3:
+; CHECK-NEXT:    movw r0, #1234
+; CHECK-NEXT:    b LBB0_5
+; CHECK-NEXT:  Ltmp1:
+; CHECK-NEXT:  LBB0_4:
+; CHECK-NEXT:    movw r0, #4567
+; CHECK-NEXT:  LBB0_5:
+; CHECK-NEXT:    bl _foo
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r8, r10, r11}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 2
 entry:
   %dst1 = call i8* @bar(i32 1, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2))
   %dst2 = call i8* @bar(i32 2, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2))

diff  --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 8b1c28117146c..8ef09996d4ab1 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -141,6 +141,12 @@ class string:
      r'[ \t]*\.cfi_endproc\n',
      flags=(re.M | re.S))
 
+ASM_FUNCTION_THUMB_DARWIN_RE = re.compile(
+     r'^_(?P<func>[^:]+):\n'
+     r'(?P<body>.*?)\n'
+     r'[ \t]*\.data_region\n',
+     flags=(re.M | re.S))
+
 ASM_FUNCTION_ARM_IOS_RE = re.compile(
      r'^_(?P<func>[^:]+):\n'
      r'(?P<body>.*?)'
@@ -360,8 +366,10 @@ def get_run_handler(triple):
       'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'arm64_32-apple-ios': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE),
       'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
       'aarch64-apple-darwin': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE),
+      'aarch64-apple-ios': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_DARWIN_RE),
       'hexagon': (scrub_asm_hexagon, ASM_FUNCTION_HEXAGON_RE),
       'r600': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
       'amdgcn': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
@@ -374,6 +382,7 @@ def get_run_handler(triple):
       'thumb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'thumb-macho': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_MACHO_RE),
       'thumbv5-macho': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_MACHO_RE),
+      'thumbv7s-apple-darwin' : (scrub_asm_arm_eabi, ASM_FUNCTION_THUMB_DARWIN_RE),
       'thumbv7-apple-ios' : (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_IOS_RE),
       'm68k': (scrub_asm_m68k, ASM_FUNCTION_M68K_RE),
       'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),


        


More information about the llvm-commits mailing list