[llvm] r363265 - [FIX] Forces shrink wrapping to consider any memory access as aliasing with the stack

Diogo N. Sampaio via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 06:56:19 PDT 2019


Author: dnsampaio
Date: Thu Jun 13 06:56:19 2019
New Revision: 363265

URL: http://llvm.org/viewvc/llvm-project?rev=363265&view=rev
Log:
[FIX] Forces shrink wrapping to consider any memory access as aliasing with the stack

Summary:
Relate bug: https://bugs.llvm.org/show_bug.cgi?id=37472

The shrink wrapping pass prematurally restores the stack, at a point where the stack might still be accessed.
Taking an exception can cause the stack to be corrupted.

As a first approach, this patch is overly conservative, assuming that any instruction that may load or store could access
the stack.

Reviewers: dmgreen, qcolombet

Reviewed By: qcolombet

Subscribers: simpal01, efriedma, eli.friedman, javed.absar, llvm-commits, eugenis, chill, carwil, thegameg

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63152


Added:
    llvm/trunk/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
Modified:
    llvm/trunk/lib/CodeGen/ShrinkWrap.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/AArch64/branch-relax-cbz.ll
    llvm/trunk/test/CodeGen/AArch64/taildup-cfi.ll
    llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
    llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/PowerPC/BreakableToken-reduced.ll
    llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
    llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll
    llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
    llvm/trunk/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
    llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
    llvm/trunk/test/CodeGen/X86/MachineSink-eflags.ll
    llvm/trunk/test/CodeGen/X86/cmov.ll
    llvm/trunk/test/CodeGen/X86/copy-eflags.ll
    llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll
    llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
    llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
    llvm/trunk/test/CodeGen/X86/xchg-nofold.ll

Modified: llvm/trunk/lib/CodeGen/ShrinkWrap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ShrinkWrap.cpp?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ShrinkWrap.cpp (original)
+++ llvm/trunk/lib/CodeGen/ShrinkWrap.cpp Thu Jun 13 06:56:19 2019
@@ -258,6 +258,15 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TY
 
 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
                                  RegScavenger *RS) const {
+  // This prevents premature stack popping when occurs a indirect stack
+  // access. It is overly aggressive for the moment.
+  // TODO: - Obvious non-stack loads and store, such as global values,
+  //         are known to not access the stack.
+  //       - Further, data dependency and alias analysis can validate
+  //         that load and stores never derive from the stack pointer.
+  if (MI.mayLoadOrStore())
+    return true;
+
   if (MI.getOpcode() == FrameSetupOpcode ||
       MI.getOpcode() == FrameDestroyOpcode) {
     LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll Thu Jun 13 06:56:19 2019
@@ -1,45 +1,51 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=arm64-apple-ios -enable-shrink-wrap=true -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra -frame-pointer=all | FileCheck %s --check-prefix=DISABLE
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios"
 
 
 ; Initial motivating example: Simple diamond with a call just on one side.
-; CHECK-LABEL: foo:
-;
-; Compare the arguments and jump to exit.
-; No prologue needed.
-; ENABLE: cmp w0, w1
-; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: sub sp, sp, #32
-; CHECK-NEXT: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #16]
-; CHECK-NEXT: add [[SAVE_SP]], sp, #16
-;
-; Compare the arguments and jump to exit.
-; After the prologue is set.
-; DISABLE: cmp w0, w1
-; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Store %a in the alloca.
-; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4]
-; Set the alloca address in the second argument.
-; CHECK-NEXT: sub x1, [[SAVE_SP]], #4
-; Set the first argument to zero.
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: bl _doSomething
-;
-; Without shrink-wrapping, epilogue is in the exit block.
-; DISABLE: [[EXIT_LABEL]]:
-; Epilogue code.
-; CHECK-NEXT: ldp x{{[0-9]+}}, [[CSR]], [sp, #16]
-; CHECK-NEXT: add sp, sp, #32
-;
-; With shrink-wrapping, exit block is a simple return.
-; ENABLE: [[EXIT_LABEL]]:
-; CHECK-NEXT: ret
 define i32 @foo(i32 %a, i32 %b) {
+; ENABLE-LABEL: foo:
+; ENABLE:       ; %bb.0:
+; ENABLE-NEXT:    cmp w0, w1
+; ENABLE-NEXT:    b.ge LBB0_2
+; ENABLE-NEXT:  ; %bb.1: ; %true
+; ENABLE-NEXT:    sub sp, sp, #32 ; =32
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    stur w0, [x29, #-4]
+; ENABLE-NEXT:    sub x1, x29, #4 ; =4
+; ENABLE-NEXT:    mov w0, wzr
+; ENABLE-NEXT:    bl _doSomething
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    add sp, sp, #32 ; =32
+; ENABLE-NEXT:  LBB0_2: ; %false
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: foo:
+; DISABLE:       ; %bb.0:
+; DISABLE-NEXT:    sub sp, sp, #32 ; =32
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    cmp w0, w1
+; DISABLE-NEXT:    b.ge LBB0_2
+; DISABLE-NEXT:  ; %bb.1: ; %true
+; DISABLE-NEXT:    stur w0, [x29, #-4]
+; DISABLE-NEXT:    sub x1, x29, #4 ; =4
+; DISABLE-NEXT:    mov w0, wzr
+; DISABLE-NEXT:    bl _doSomething
+; DISABLE-NEXT:  LBB0_2: ; %false
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    add sp, sp, #32 ; =32
+; DISABLE-NEXT:    ret
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
@@ -60,50 +66,65 @@ declare i32 @doSomething(i32, i32*)
 
 ; Check that we do not perform the restore inside the loop whereas the save
 ; is outside.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
-;
-; Shrink-wrapping allows to skip the prologue in the else case.
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
-; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
-; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
-;
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body
-; CHECK: bl _something
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
-; CHECK-NEXT: b.ne [[LOOP]]
-;
-; Next BB.
-; Copy SUM into the returned register + << 3.
-; CHECK: lsl w0, [[SUM]], #3
-;
-; Jump to epilogue.
-; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
-;
-; DISABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; DISABLE: lsl w0, w1, #1
-; DISABLE: [[EPILOG_BB]]: ; %if.end
-;
-; Epilogue code.
-; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
-; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: lsl w0, w1, #1
-; ENABLE: ret
 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbz w0, LBB1_4
+; ENABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    mov w19, wzr
+; ENABLE-NEXT:    mov w20, #10
+; ENABLE-NEXT:  LBB1_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    bl _something
+; ENABLE-NEXT:    subs w20, w20, #1 ; =1
+; ENABLE-NEXT:    add w19, w0, w19
+; ENABLE-NEXT:    b.ne LBB1_2
+; ENABLE-NEXT:  ; %bb.3: ; %for.end
+; ENABLE-NEXT:    lsl w0, w19, #3
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB1_4: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    cbz w0, LBB1_4
+; DISABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; DISABLE-NEXT:    mov w19, wzr
+; DISABLE-NEXT:    mov w20, #10
+; DISABLE-NEXT:  LBB1_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    bl _something
+; DISABLE-NEXT:    subs w20, w20, #1 ; =1
+; DISABLE-NEXT:    add w19, w0, w19
+; DISABLE-NEXT:    b.ne LBB1_2
+; DISABLE-NEXT:  ; %bb.3: ; %for.end
+; DISABLE-NEXT:    lsl w0, w19, #3
+; DISABLE-NEXT:    b LBB1_5
+; DISABLE-NEXT:  LBB1_4: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:  LBB1_5: ; %if.end
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.body
@@ -134,26 +155,54 @@ declare i32 @something(...)
 
 ; Check that we do not perform the shrink-wrapping inside the loop even
 ; though that would be legal. The cost model must prevent that.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
-; Prologue code.
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
-; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
-; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
-; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
-; Next BB.
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
-; CHECK: bl _something
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
-; CHECK-NEXT: b.ne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: ; %for.end
-; CHECK: mov w0, [[SUM]]
-; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
-; CHECK-NEXT: ret
 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    mov w19, wzr
+; ENABLE-NEXT:    mov w20, #10
+; ENABLE-NEXT:  LBB2_1: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    bl _something
+; ENABLE-NEXT:    subs w20, w20, #1 ; =1
+; ENABLE-NEXT:    add w19, w0, w19
+; ENABLE-NEXT:    b.ne LBB2_1
+; ENABLE-NEXT:  ; %bb.2: ; %for.end
+; ENABLE-NEXT:    mov w0, w19
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    mov w19, wzr
+; DISABLE-NEXT:    mov w20, #10
+; DISABLE-NEXT:  LBB2_1: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    bl _something
+; DISABLE-NEXT:    subs w20, w20, #1 ; =1
+; DISABLE-NEXT:    add w19, w0, w19
+; DISABLE-NEXT:    b.ne LBB2_1
+; DISABLE-NEXT:  ; %bb.2: ; %for.end
+; DISABLE-NEXT:    mov w0, w19
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   br label %for.body
 
@@ -172,46 +221,67 @@ for.end:
 
 ; Check with a more complex case that we do not have save within the loop and
 ; restore outside.
-; CHECK-LABEL: loopInfoSaveOutsideLoop:
-;
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
-; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
-; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
-;
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
-; CHECK: bl _something
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
-; CHECK-NEXT: b.ne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: bl _somethingElse
-; CHECK-NEXT: lsl w0, [[SUM]], #3
-;
-; Jump to epilogue.
-; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
-;
-; DISABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; DISABLE: lsl w0, w1, #1
-; DISABLE: [[EPILOG_BB]]: ; %if.end
-; Epilogue code.
-; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
-; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: lsl w0, w1, #1
-; ENABLE: ret
 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-LABEL: loopInfoSaveOutsideLoop:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbz w0, LBB3_4
+; ENABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    mov w19, wzr
+; ENABLE-NEXT:    mov w20, #10
+; ENABLE-NEXT:  LBB3_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    bl _something
+; ENABLE-NEXT:    subs w20, w20, #1 ; =1
+; ENABLE-NEXT:    add w19, w0, w19
+; ENABLE-NEXT:    b.ne LBB3_2
+; ENABLE-NEXT:  ; %bb.3: ; %for.end
+; ENABLE-NEXT:    bl _somethingElse
+; ENABLE-NEXT:    lsl w0, w19, #3
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB3_4: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: loopInfoSaveOutsideLoop:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    cbz w0, LBB3_4
+; DISABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; DISABLE-NEXT:    mov w19, wzr
+; DISABLE-NEXT:    mov w20, #10
+; DISABLE-NEXT:  LBB3_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    bl _something
+; DISABLE-NEXT:    subs w20, w20, #1 ; =1
+; DISABLE-NEXT:    add w19, w0, w19
+; DISABLE-NEXT:    b.ne LBB3_2
+; DISABLE-NEXT:  ; %bb.3: ; %for.end
+; DISABLE-NEXT:    bl _somethingElse
+; DISABLE-NEXT:    lsl w0, w19, #3
+; DISABLE-NEXT:    b LBB3_5
+; DISABLE-NEXT:  LBB3_4: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:  LBB3_5: ; %if.end
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.body
@@ -243,45 +313,57 @@ declare void @somethingElse(...)
 
 ; Check with a more complex case that we do not have restore within the loop and
 ; save outside.
-; CHECK-LABEL: loopInfoRestoreOutsideLoop:
-;
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
-; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
-; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
-;
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: bl _somethingElse
-; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr
-; CHECK-NEXT: mov [[IV:w[0-9]+]], #10
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
-; CHECK: bl _something
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
-; CHECK-NEXT: b.ne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: lsl w0, [[SUM]], #3
-;
-; Jump to epilogue.
-; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
-;
-; DISABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; DISABLE: lsl w0, w1, #1
-; DISABLE: [[EPILOG_BB]]: ; %if.end
-; Epilogue code.
-; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
-; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: lsl w0, w1, #1
-; ENABLE: ret
 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
+; ENABLE-LABEL: loopInfoRestoreOutsideLoop:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbz w0, LBB4_4
+; ENABLE-NEXT:  ; %bb.1: ; %if.then
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    bl _somethingElse
+; ENABLE-NEXT:    mov w19, wzr
+; ENABLE-NEXT:    mov w20, #10
+; ENABLE-NEXT:  LBB4_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    bl _something
+; ENABLE-NEXT:    subs w20, w20, #1 ; =1
+; ENABLE-NEXT:    add w19, w0, w19
+; ENABLE-NEXT:    b.ne LBB4_2
+; ENABLE-NEXT:  ; %bb.3: ; %for.end
+; ENABLE-NEXT:    lsl w0, w19, #3
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB4_4: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: loopInfoRestoreOutsideLoop:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    cbz w0, LBB4_4
+; DISABLE-NEXT:  ; %bb.1: ; %if.then
+; DISABLE-NEXT:    bl _somethingElse
+; DISABLE-NEXT:    mov w19, wzr
+; DISABLE-NEXT:    mov w20, #10
+; DISABLE-NEXT:  LBB4_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    bl _something
+; DISABLE-NEXT:    subs w20, w20, #1 ; =1
+; DISABLE-NEXT:    add w19, w0, w19
+; DISABLE-NEXT:    b.ne LBB4_2
+; DISABLE-NEXT:  ; %bb.3: ; %for.end
+; DISABLE-NEXT:    lsl w0, w19, #3
+; DISABLE-NEXT:    b LBB4_5
+; DISABLE-NEXT:  LBB4_4: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:  LBB4_5: ; %if.end
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -313,49 +395,74 @@ if.end:
 }
 
 ; Check that we handle function with no frame information correctly.
-; CHECK-LABEL: emptyFrame:
-; CHECK: ; %entry
-; CHECK-NEXT: mov w0, wzr
-; CHECK-NEXT: ret
 define i32 @emptyFrame() {
+; ENABLE-LABEL: emptyFrame:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    mov w0, wzr
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: emptyFrame:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    mov w0, wzr
+; DISABLE-NEXT:    ret
 entry:
   ret i32 0
 }
 
 ; Check that we handle variadic function correctly.
-; CHECK-LABEL: variadicFunc:
-;
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: sub sp, sp, #16
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Sum is merged with the returned register.
-; CHECK: add [[VA_BASE:x[0-9]+]], sp, #16
-; CHECK-NEXT: cmp w1, #1
-; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
-; CHECK-NEXT: mov [[SUM:w0]], wzr
-; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
-; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
-; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8
-; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8]
-; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]]
-; CHECK-NEXT: subs w1, w1, #1
-; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
-; CHECK-NEXT: b.ne [[LOOP_LABEL]]
-; CHECK-NEXT: [[IFEND_LABEL]]:
-; Epilogue code.
-; CHECK: add sp, sp, #16
-; CHECK-NEXT: ret
-;
-; CHECK: [[ELSE_LABEL]]: ; %if.else
-; CHECK-NEXT: lsl w0, w1, #1
-; DISABLE-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
 define i32 @variadicFunc(i32 %cond, i32 %count, ...) nounwind {
+; ENABLE-LABEL: variadicFunc:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbz w0, LBB6_4
+; ENABLE-NEXT:  ; %bb.1: ; %if.then
+; ENABLE-NEXT:    sub sp, sp, #16 ; =16
+; ENABLE-NEXT:    add x8, sp, #16 ; =16
+; ENABLE-NEXT:    cmp w1, #1 ; =1
+; ENABLE-NEXT:    str x8, [sp, #8]
+; ENABLE-NEXT:    mov w0, wzr
+; ENABLE-NEXT:    b.lt LBB6_3
+; ENABLE-NEXT:  LBB6_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ldr x8, [sp, #8]
+; ENABLE-NEXT:    add x9, x8, #8 ; =8
+; ENABLE-NEXT:    str x9, [sp, #8]
+; ENABLE-NEXT:    ldr w8, [x8]
+; ENABLE-NEXT:    subs w1, w1, #1 ; =1
+; ENABLE-NEXT:    add w0, w0, w8
+; ENABLE-NEXT:    b.ne LBB6_2
+; ENABLE-NEXT:  LBB6_3: ; %for.end
+; ENABLE-NEXT:    add sp, sp, #16 ; =16
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB6_4: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: variadicFunc:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    sub sp, sp, #16 ; =16
+; DISABLE-NEXT:    cbz w0, LBB6_4
+; DISABLE-NEXT:  ; %bb.1: ; %if.then
+; DISABLE-NEXT:    add x8, sp, #16 ; =16
+; DISABLE-NEXT:    cmp w1, #1 ; =1
+; DISABLE-NEXT:    str x8, [sp, #8]
+; DISABLE-NEXT:    mov w0, wzr
+; DISABLE-NEXT:    b.lt LBB6_3
+; DISABLE-NEXT:  LBB6_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ldr x8, [sp, #8]
+; DISABLE-NEXT:    add x9, x8, #8 ; =8
+; DISABLE-NEXT:    str x9, [sp, #8]
+; DISABLE-NEXT:    ldr w8, [x8]
+; DISABLE-NEXT:    subs w1, w1, #1 ; =1
+; DISABLE-NEXT:    add w0, w0, w8
+; DISABLE-NEXT:    b.ne LBB6_2
+; DISABLE-NEXT:  LBB6_3: ; %if.end
+; DISABLE-NEXT:    add sp, sp, #16 ; =16
+; DISABLE-NEXT:    ret
+; DISABLE-NEXT:  LBB6_4: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:    add sp, sp, #16 ; =16
+; DISABLE-NEXT:    ret
 entry:
   %ap = alloca i8*, align 8
   %tobool = icmp eq i32 %cond, 0
@@ -395,35 +502,55 @@ declare void @llvm.va_start(i8*)
 declare void @llvm.va_end(i8*)
 
 ; Check that we handle inline asm correctly.
-; CHECK-LABEL: inlineAsm:
-;
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: x19.
-; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]!
-;
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: mov [[IV:w[0-9]+]], #10
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
-; Inline asm statement.
-; CHECK: subs [[IV]], [[IV]], #1
-; CHECK: add x19, x19, #1
-; CHECK: b.ne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: mov w0, wzr
-; Epilogue code.
-; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
-; CHECK-NEXT: ret
-; Next BB.
-; CHECK: [[ELSE_LABEL]]: ; %if.else
-; CHECK-NEXT: lsl w0, w1, #1
-; Epilogue code.
-; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
-; CHECK-NEXT: ret
 define i32 @inlineAsm(i32 %cond, i32 %N) {
+; ENABLE-LABEL: inlineAsm:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbz w0, LBB7_4
+; ENABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; ENABLE-NEXT:    stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset w19, -8
+; ENABLE-NEXT:    .cfi_offset w20, -16
+; ENABLE-NEXT:    mov w8, #10
+; ENABLE-NEXT:  LBB7_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    subs w8, w8, #1 ; =1
+; ENABLE-NEXT:    ; InlineAsm Start
+; ENABLE-NEXT:    add x19, x19, #1 ; =1
+; ENABLE-NEXT:    ; InlineAsm End
+; ENABLE-NEXT:    b.ne LBB7_2
+; ENABLE-NEXT:  ; %bb.3:
+; ENABLE-NEXT:    mov w0, wzr
+; ENABLE-NEXT:    ldp x20, x19, [sp], #16 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB7_4: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: inlineAsm:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-16]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset w19, -8
+; DISABLE-NEXT:    .cfi_offset w20, -16
+; DISABLE-NEXT:    cbz w0, LBB7_4
+; DISABLE-NEXT:  ; %bb.1: ; %for.body.preheader
+; DISABLE-NEXT:    mov w8, #10
+; DISABLE-NEXT:  LBB7_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    subs w8, w8, #1 ; =1
+; DISABLE-NEXT:    ; InlineAsm Start
+; DISABLE-NEXT:    add x19, x19, #1 ; =1
+; DISABLE-NEXT:    ; InlineAsm End
+; DISABLE-NEXT:    b.ne LBB7_2
+; DISABLE-NEXT:  ; %bb.3:
+; DISABLE-NEXT:    mov w0, wzr
+; DISABLE-NEXT:    ldp x20, x19, [sp], #16 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
+; DISABLE-NEXT:  LBB7_4: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:    ldp x20, x19, [sp], #16 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.body
@@ -445,38 +572,55 @@ if.end:
 }
 
 ; Check that we handle calls to variadic functions correctly.
-; CHECK-LABEL: callVariadicFunc:
-;
-; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: sub sp, sp, #64
-; CHECK-NEXT: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #48]
-; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #48
-;
-; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
-; Setup of the varags.
-; CHECK: stp x1, x1, [sp, #32]
-; CHECK-NEXT: stp x1, x1, [sp, #16]
-; CHECK-NEXT: stp x1, x1, [sp]
-; CHECK-NEXT: mov w0, w1
-; CHECK-NEXT: bl _someVariadicFunc
-; CHECK-NEXT: lsl w0, w0, #3
-;
-; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]]
-; DISABLE: [[ELSE_LABEL]]: ; %if.else
-; DISABLE-NEXT: lsl w0, w1, #1
-; DISABLE: [[IFEND_LABEL]]: ; %if.end
-;
-; Epilogue code.
-; CHECK: ldp [[CSR1]], [[CSR2]], [sp, #48]
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
-;
-; ENABLE: [[ELSE_LABEL]]: ; %if.else
-; ENABLE-NEXT: lsl w0, w1, #1
-; ENABLE-NEXT: ret
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+; ENABLE-LABEL: callVariadicFunc:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; ENABLE-NEXT:    cbz w0, LBB8_2
+; ENABLE-NEXT:  ; %bb.1: ; %if.then
+; ENABLE-NEXT:    sub sp, sp, #64 ; =64
+; ENABLE-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #48 ; =48
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    stp x1, x1, [sp, #32]
+; ENABLE-NEXT:    stp x1, x1, [sp, #16]
+; ENABLE-NEXT:    stp x1, x1, [sp]
+; ENABLE-NEXT:    mov w0, w1
+; ENABLE-NEXT:    bl _someVariadicFunc
+; ENABLE-NEXT:    lsl w0, w0, #3
+; ENABLE-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; ENABLE-NEXT:    add sp, sp, #64 ; =64
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB8_2: ; %if.else
+; ENABLE-NEXT:    lsl w0, w1, #1
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: callVariadicFunc:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    sub sp, sp, #64 ; =64
+; DISABLE-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #48 ; =48
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; DISABLE-NEXT:    cbz w0, LBB8_2
+; DISABLE-NEXT:  ; %bb.1: ; %if.then
+; DISABLE-NEXT:    stp x1, x1, [sp, #32]
+; DISABLE-NEXT:    stp x1, x1, [sp, #16]
+; DISABLE-NEXT:    stp x1, x1, [sp]
+; DISABLE-NEXT:    mov w0, w1
+; DISABLE-NEXT:    bl _someVariadicFunc
+; DISABLE-NEXT:    lsl w0, w0, #3
+; DISABLE-NEXT:    b LBB8_3
+; DISABLE-NEXT:  LBB8_2: ; %if.else
+; DISABLE-NEXT:    lsl w0, w1, #1
+; DISABLE-NEXT:  LBB8_3: ; %if.end
+; DISABLE-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; DISABLE-NEXT:    add sp, sp, #64 ; =64
+; DISABLE-NEXT:    ret
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -501,24 +645,35 @@ declare i32 @someVariadicFunc(i32, ...)
 ; Although this is not incorrect to insert such code, it is useless
 ; and it hurts the binary size.
 ;
-; CHECK-LABEL: noreturn:
-; DISABLE: stp
-;
-; CHECK: cbnz w0, [[ABORT:LBB[0-9_]+]]
-;
-; CHECK: mov w0, #42
-;
-; DISABLE-NEXT: ldp
-;
-; CHECK-NEXT: ret
-;
-; CHECK: [[ABORT]]: ; %if.abort
-;
-; ENABLE: stp
-;
-; CHECK: bl _abort
-; ENABLE-NOT: ldp
 define i32 @noreturn(i8 signext %bad_thing) {
+; ENABLE-LABEL: noreturn:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbnz w0, LBB9_2
+; ENABLE-NEXT:  ; %bb.1: ; %if.end
+; ENABLE-NEXT:    mov w0, #42
+; ENABLE-NEXT:    ret
+; ENABLE-NEXT:  LBB9_2: ; %if.abort
+; ENABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    mov x29, sp
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    bl _abort
+;
+; DISABLE-LABEL: noreturn:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    mov x29, sp
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    cbnz w0, LBB9_2
+; DISABLE-NEXT:  ; %bb.1: ; %if.end
+; DISABLE-NEXT:    mov w0, #42
+; DISABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
+; DISABLE-NEXT:  LBB9_2: ; %if.abort
+; DISABLE-NEXT:    bl _abort
 entry:
   %tobool = icmp eq i8 %bad_thing, 0
   br i1 %tobool, label %if.end, label %if.abort
@@ -543,9 +698,60 @@ attributes #0 = { noreturn nounwind }
 ; should return gracefully and continue compilation.
 ; The only condition for this test is the compilation finishes correctly.
 ;
-; CHECK-LABEL: infiniteloop
-; CHECK: ret
 define void @infiniteloop() {
+; ENABLE-LABEL: infiniteloop:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    cbnz wzr, LBB10_3
+; ENABLE-NEXT:  ; %bb.1: ; %if.then
+; ENABLE-NEXT:    sub x19, sp, #16 ; =16
+; ENABLE-NEXT:    mov sp, x19
+; ENABLE-NEXT:    mov w20, wzr
+; ENABLE-NEXT:  LBB10_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    bl _something
+; ENABLE-NEXT:    add w20, w0, w20
+; ENABLE-NEXT:    str w20, [x19]
+; ENABLE-NEXT:    b LBB10_2
+; ENABLE-NEXT:  LBB10_3: ; %if.end
+; ENABLE-NEXT:    sub sp, x29, #16 ; =16
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: infiniteloop:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    cbnz wzr, LBB10_3
+; DISABLE-NEXT:  ; %bb.1: ; %if.then
+; DISABLE-NEXT:    sub x19, sp, #16 ; =16
+; DISABLE-NEXT:    mov sp, x19
+; DISABLE-NEXT:    mov w20, wzr
+; DISABLE-NEXT:  LBB10_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    bl _something
+; DISABLE-NEXT:    add w20, w0, w20
+; DISABLE-NEXT:    str w20, [x19]
+; DISABLE-NEXT:    b LBB10_2
+; DISABLE-NEXT:  LBB10_3: ; %if.end
+; DISABLE-NEXT:    sub sp, x29, #16 ; =16
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   br i1 undef, label %if.then, label %if.end
 
@@ -565,9 +771,72 @@ if.end:
 }
 
 ; Another infinite loop test this time with a body bigger than just one block.
-; CHECK-LABEL: infiniteloop2
-; CHECK: ret
 define void @infiniteloop2() {
+; ENABLE-LABEL: infiniteloop2:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #16 ; =16
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    cbnz wzr, LBB11_3
+; ENABLE-NEXT:  ; %bb.1: ; %if.then
+; ENABLE-NEXT:    sub x8, sp, #16 ; =16
+; ENABLE-NEXT:    mov sp, x8
+; ENABLE-NEXT:    mov w9, wzr
+; ENABLE-NEXT:    ; InlineAsm Start
+; ENABLE-NEXT:    mov x10, #0
+; ENABLE-NEXT:    ; InlineAsm End
+; ENABLE-NEXT:  LBB11_2: ; %for.body
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    add w9, w10, w9
+; ENABLE-NEXT:    str w9, [x8]
+; ENABLE-NEXT:    ; InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ; InlineAsm End
+; ENABLE-NEXT:    mov w9, #1
+; ENABLE-NEXT:    b LBB11_2
+; ENABLE-NEXT:  LBB11_3: ; %if.end
+; ENABLE-NEXT:    sub sp, x29, #16 ; =16
+; ENABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: infiniteloop2:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #16 ; =16
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    cbnz wzr, LBB11_3
+; DISABLE-NEXT:  ; %bb.1: ; %if.then
+; DISABLE-NEXT:    sub x8, sp, #16 ; =16
+; DISABLE-NEXT:    mov sp, x8
+; DISABLE-NEXT:    mov w9, wzr
+; DISABLE-NEXT:    ; InlineAsm Start
+; DISABLE-NEXT:    mov x10, #0
+; DISABLE-NEXT:    ; InlineAsm End
+; DISABLE-NEXT:  LBB11_2: ; %for.body
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    add w9, w10, w9
+; DISABLE-NEXT:    str w9, [x8]
+; DISABLE-NEXT:    ; InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ; InlineAsm End
+; DISABLE-NEXT:    mov w9, #1
+; DISABLE-NEXT:    b LBB11_2
+; DISABLE-NEXT:  LBB11_3: ; %if.end
+; DISABLE-NEXT:    sub sp, x29, #16 ; =16
+; DISABLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
 entry:
   br i1 undef, label %if.then, label %if.end
 
@@ -595,9 +864,54 @@ if.end:
 }
 
 ; Another infinite loop test this time with two nested infinite loop.
-; CHECK-LABEL: infiniteloop3
-; CHECK: ret
 define void @infiniteloop3() {
+; ENABLE-LABEL: infiniteloop3:
+; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    cbnz wzr, LBB12_5
+; ENABLE-NEXT:  ; %bb.1: ; %loop2a.preheader
+; ENABLE-NEXT:    mov x8, xzr
+; ENABLE-NEXT:    mov x9, xzr
+; ENABLE-NEXT:    mov x11, xzr
+; ENABLE-NEXT:    b LBB12_3
+; ENABLE-NEXT:  LBB12_2: ; %loop2b
+; ENABLE-NEXT:    ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT:    str x10, [x11]
+; ENABLE-NEXT:    mov x11, x10
+; ENABLE-NEXT:  LBB12_3: ; %loop1
+; ENABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    mov x10, x9
+; ENABLE-NEXT:    ldr x9, [x8]
+; ENABLE-NEXT:    cbnz x8, LBB12_2
+; ENABLE-NEXT:  ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; ENABLE-NEXT:    mov x8, x10
+; ENABLE-NEXT:    mov x11, x10
+; ENABLE-NEXT:    b LBB12_3
+; ENABLE-NEXT:  LBB12_5: ; %end
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: infiniteloop3:
+; DISABLE:       ; %bb.0: ; %entry
+; DISABLE-NEXT:    cbnz wzr, LBB12_5
+; DISABLE-NEXT:  ; %bb.1: ; %loop2a.preheader
+; DISABLE-NEXT:    mov x8, xzr
+; DISABLE-NEXT:    mov x9, xzr
+; DISABLE-NEXT:    mov x11, xzr
+; DISABLE-NEXT:    b LBB12_3
+; DISABLE-NEXT:  LBB12_2: ; %loop2b
+; DISABLE-NEXT:    ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT:    str x10, [x11]
+; DISABLE-NEXT:    mov x11, x10
+; DISABLE-NEXT:  LBB12_3: ; %loop1
+; DISABLE-NEXT:    ; =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    mov x10, x9
+; DISABLE-NEXT:    ldr x9, [x8]
+; DISABLE-NEXT:    cbnz x8, LBB12_2
+; DISABLE-NEXT:  ; %bb.4: ; in Loop: Header=BB12_3 Depth=1
+; DISABLE-NEXT:    mov x8, x10
+; DISABLE-NEXT:    mov x11, x10
+; DISABLE-NEXT:    b LBB12_3
+; DISABLE-NEXT:  LBB12_5: ; %end
+; DISABLE-NEXT:    ret
 entry:
   br i1 undef, label %loop2a, label %body
 
@@ -628,22 +942,50 @@ end:
 
 ; Re-aligned stack pointer.  See bug 26642.  Avoid clobbering live
 ; values in the prologue when re-aligning the stack pointer.
-; CHECK-LABEL: stack_realign:
-; ENABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
-; ENABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
-; DISABLE-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
-; DISABLE-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK: mov x29, sp
-; ENABLE-NOT: sub x[[LSL1]], sp, #16
-; ENABLE-NOT: sub x[[LSL2]], sp, #16
-; DISABLE: sub x{{[0-9]+}}, sp, #16
-; DISABLE-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
-; DISABLE-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
-; CHECK-DAG: str w[[LSL1]],
-; CHECK-DAG: str w[[LSL2]],
-
 define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) {
+; ENABLE-LABEL: stack_realign:
+; ENABLE:       ; %bb.0:
+; ENABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    mov x29, sp
+; ENABLE-NEXT:    sub x9, sp, #16 ; =16
+; ENABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    lsl w8, w0, w1
+; ENABLE-NEXT:    cmp w0, w1
+; ENABLE-NEXT:    lsl w9, w1, w0
+; ENABLE-NEXT:    b.ge LBB13_2
+; ENABLE-NEXT:  ; %bb.1: ; %true
+; ENABLE-NEXT:    str w0, [sp]
+; ENABLE-NEXT:  LBB13_2: ; %false
+; ENABLE-NEXT:    str w8, [x2]
+; ENABLE-NEXT:    str w9, [x3]
+; ENABLE-NEXT:    mov sp, x29
+; ENABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: stack_realign:
+; DISABLE:       ; %bb.0:
+; DISABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    mov x29, sp
+; DISABLE-NEXT:    sub x9, sp, #16 ; =16
+; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    lsl w8, w0, w1
+; DISABLE-NEXT:    cmp w0, w1
+; DISABLE-NEXT:    lsl w9, w1, w0
+; DISABLE-NEXT:    b.ge LBB13_2
+; DISABLE-NEXT:  ; %bb.1: ; %true
+; DISABLE-NEXT:    str w0, [sp]
+; DISABLE-NEXT:  LBB13_2: ; %false
+; DISABLE-NEXT:    str w8, [x2]
+; DISABLE-NEXT:    str w9, [x3]
+; DISABLE-NEXT:    mov sp, x29
+; DISABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
   %tmp = alloca i32, align 32
   %shl1 = shl i32 %a, %b
   %shl2 = shl i32 %b, %a
@@ -667,12 +1009,126 @@ false:
 ; ensuring we have a scratch register to re-align the stack pointer is
 ; too complicated.  Output should be the same for both enabled and
 ; disabled shrink wrapping.
-; CHECK-LABEL: stack_realign2:
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]!
-; CHECK: add x29, sp, #{{[0-9]+}}
-; CHECK: lsl {{w[0-9]+}}, w0, w1
-
 define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) {
+; ENABLE-LABEL: stack_realign2:
+; ENABLE:       ; %bb.0:
+; ENABLE-NEXT:    stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x24, x23, [sp, #32] ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; ENABLE-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; ENABLE-NEXT:    add x29, sp, #80 ; =80
+; ENABLE-NEXT:    sub x9, sp, #32 ; =32
+; ENABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
+; ENABLE-NEXT:    .cfi_def_cfa w29, 16
+; ENABLE-NEXT:    .cfi_offset w30, -8
+; ENABLE-NEXT:    .cfi_offset w29, -16
+; ENABLE-NEXT:    .cfi_offset w19, -24
+; ENABLE-NEXT:    .cfi_offset w20, -32
+; ENABLE-NEXT:    .cfi_offset w21, -40
+; ENABLE-NEXT:    .cfi_offset w22, -48
+; ENABLE-NEXT:    .cfi_offset w23, -56
+; ENABLE-NEXT:    .cfi_offset w24, -64
+; ENABLE-NEXT:    .cfi_offset w25, -72
+; ENABLE-NEXT:    .cfi_offset w26, -80
+; ENABLE-NEXT:    .cfi_offset w27, -88
+; ENABLE-NEXT:    .cfi_offset w28, -96
+; ENABLE-NEXT:    lsl w8, w0, w1
+; ENABLE-NEXT:    lsl w9, w1, w0
+; ENABLE-NEXT:    lsr w10, w0, w1
+; ENABLE-NEXT:    lsr w11, w1, w0
+; ENABLE-NEXT:    add w12, w1, w0
+; ENABLE-NEXT:    sub w13, w1, w0
+; ENABLE-NEXT:    cmp w0, w1
+; ENABLE-NEXT:    add w17, w8, w9
+; ENABLE-NEXT:    sub w16, w9, w10
+; ENABLE-NEXT:    add w15, w10, w11
+; ENABLE-NEXT:    add w14, w11, w12
+; ENABLE-NEXT:    b.ge LBB14_2
+; ENABLE-NEXT:  ; %bb.1: ; %true
+; ENABLE-NEXT:    str w0, [sp]
+; ENABLE-NEXT:    ; InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ; InlineAsm End
+; ENABLE-NEXT:  LBB14_2: ; %false
+; ENABLE-NEXT:    str w8, [x2]
+; ENABLE-NEXT:    str w9, [x3]
+; ENABLE-NEXT:    str w10, [x4]
+; ENABLE-NEXT:    str w11, [x5]
+; ENABLE-NEXT:    str w12, [x6]
+; ENABLE-NEXT:    str w13, [x7]
+; ENABLE-NEXT:    stp w0, w1, [x2, #4]
+; ENABLE-NEXT:    stp w17, w16, [x2, #12]
+; ENABLE-NEXT:    stp w15, w14, [x2, #20]
+; ENABLE-NEXT:    sub sp, x29, #80 ; =80
+; ENABLE-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x24, x23, [sp, #32] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
+; ENABLE-NEXT:    ldp x28, x27, [sp], #96 ; 16-byte Folded Reload
+; ENABLE-NEXT:    ret
+;
+; DISABLE-LABEL: stack_realign2:
+; DISABLE:       ; %bb.0:
+; DISABLE-NEXT:    stp x28, x27, [sp, #-96]! ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x26, x25, [sp, #16] ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x24, x23, [sp, #32] ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x22, x21, [sp, #48] ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x20, x19, [sp, #64] ; 16-byte Folded Spill
+; DISABLE-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; DISABLE-NEXT:    add x29, sp, #80 ; =80
+; DISABLE-NEXT:    sub x9, sp, #32 ; =32
+; DISABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
+; DISABLE-NEXT:    .cfi_def_cfa w29, 16
+; DISABLE-NEXT:    .cfi_offset w30, -8
+; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    .cfi_offset w19, -24
+; DISABLE-NEXT:    .cfi_offset w20, -32
+; DISABLE-NEXT:    .cfi_offset w21, -40
+; DISABLE-NEXT:    .cfi_offset w22, -48
+; DISABLE-NEXT:    .cfi_offset w23, -56
+; DISABLE-NEXT:    .cfi_offset w24, -64
+; DISABLE-NEXT:    .cfi_offset w25, -72
+; DISABLE-NEXT:    .cfi_offset w26, -80
+; DISABLE-NEXT:    .cfi_offset w27, -88
+; DISABLE-NEXT:    .cfi_offset w28, -96
+; DISABLE-NEXT:    lsl w8, w0, w1
+; DISABLE-NEXT:    lsl w9, w1, w0
+; DISABLE-NEXT:    lsr w10, w0, w1
+; DISABLE-NEXT:    lsr w11, w1, w0
+; DISABLE-NEXT:    add w12, w1, w0
+; DISABLE-NEXT:    sub w13, w1, w0
+; DISABLE-NEXT:    cmp w0, w1
+; DISABLE-NEXT:    add w17, w8, w9
+; DISABLE-NEXT:    sub w16, w9, w10
+; DISABLE-NEXT:    add w15, w10, w11
+; DISABLE-NEXT:    add w14, w11, w12
+; DISABLE-NEXT:    b.ge LBB14_2
+; DISABLE-NEXT:  ; %bb.1: ; %true
+; DISABLE-NEXT:    str w0, [sp]
+; DISABLE-NEXT:    ; InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ; InlineAsm End
+; DISABLE-NEXT:  LBB14_2: ; %false
+; DISABLE-NEXT:    str w8, [x2]
+; DISABLE-NEXT:    str w9, [x3]
+; DISABLE-NEXT:    str w10, [x4]
+; DISABLE-NEXT:    str w11, [x5]
+; DISABLE-NEXT:    str w12, [x6]
+; DISABLE-NEXT:    str w13, [x7]
+; DISABLE-NEXT:    stp w0, w1, [x2, #4]
+; DISABLE-NEXT:    stp w17, w16, [x2, #12]
+; DISABLE-NEXT:    stp w15, w14, [x2, #20]
+; DISABLE-NEXT:    sub sp, x29, #80 ; =80
+; DISABLE-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x22, x21, [sp, #48] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x24, x23, [sp, #32] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x26, x25, [sp, #16] ; 16-byte Folded Reload
+; DISABLE-NEXT:    ldp x28, x27, [sp], #96 ; 16-byte Folded Reload
+; DISABLE-NEXT:    ret
   %tmp = alloca i32, align 32
   %tmp1 = shl i32 %a, %b
   %tmp2 = shl i32 %b, %a

Modified: llvm/trunk/test/CodeGen/AArch64/branch-relax-cbz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/branch-relax-cbz.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/branch-relax-cbz.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/branch-relax-cbz.ll Thu Jun 13 06:56:19 2019
@@ -1,28 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-apple-darwin -aarch64-cbz-offset-bits=3 < %s | FileCheck %s
 
-; CHECK-LABEL: _split_block_no_fallthrough:
-; CHECK: cmn x{{[0-9]+}}, #5
-; CHECK-NEXT: b.le [[B2:LBB[0-9]+_[0-9]+]]
-
-; CHECK-NEXT: ; %bb.1: ; %b3
-; CHECK: ldr [[LOAD:w[0-9]+]]
-; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]]
-
-; CHECK-NEXT: [[B8]]: ; %b8
-; CHECK-NEXT: ret
-
-; CHECK-NEXT: [[B2]]: ; %b2
-; CHECK: mov w{{[0-9]+}}, #93
-; CHECK: bl _extfunc
-; CHECK: cbz w{{[0-9]+}}, [[B7]]
-; CHECK-NEXT: b [[B8]]
-
-; CHECK-NEXT: [[B7]]: ; %b7
-; CHECK: mov w{{[0-9]+}}, #13
-; CHECK: b _extfunc
 
 define void @split_block_no_fallthrough(i64 %val) #0 {
+; CHECK-LABEL: split_block_no_fallthrough:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-NEXT:    cmn x0, #5 ; =5
+; CHECK-NEXT:    b.le LBB0_3
+; CHECK-NEXT:  ; %bb.1: ; %b3
+; CHECK-NEXT:    ldr w8, [x8]
+; CHECK-NEXT:    cbnz w8, LBB0_2
+; CHECK-NEXT:    b LBB0_4
+; CHECK-NEXT:  LBB0_2: ; %b8
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB0_3: ; %b2
+; CHECK-NEXT:    mov w0, #93
+; CHECK-NEXT:    bl _extfunc
+; CHECK-NEXT:    cbnz w0, LBB0_2
+; CHECK-NEXT:  LBB0_4: ; %b7
+; CHECK-NEXT:    mov w0, #13
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT:    b _extfunc
 bb:
   %c0 = icmp sgt i64 %val, -5
   br i1 %c0, label %b3, label %b2

Added: llvm/trunk/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir?rev=363265&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir Thu Jun 13 06:56:19 2019
@@ -0,0 +1,229 @@
+--- |
+ ; pr37472
+ ; These test verify that shrink-wrap does not set the restore point
+ ; to a position where the stack might still be accessed by a load or store
+ 
+ ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s
+ ; CHECK:      name:            compiler_pop_stack
+ ; CHECK:      frameInfo:       
+ ; CHECK-NOT:  savePoint:
+ ; CHECK-NOT:  restorePoint:
+ ; CHECK:      stack:
+ ; CHECK:      name:            f
+ ; CHECK:      frameInfo:       
+ ; CHECK:      savePoint:       '%bb.2'
+ ; CHECK-NEXT: restorePoint:    '%bb.4'
+ ; CHECK-NEXT: stack:
+
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-arm-none-eabi"
+
+  %struct.S = type { i32, i32 }
+
+  @__const.f.arr = private unnamed_addr constant [4 x i8] c"\01\02\03\04", align 1
+
+
+  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+
+  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+
+  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
+ ; Test from: https://bugs.llvm.org/show_bug.cgi?id=42136
+  define void @compiler_pop_stack(i32 %num) {
+  entry:
+    %rstack = alloca [16 x i32], align 4
+    %tmp = bitcast [16 x i32]* %rstack to i8*
+    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %tmp)
+    %cmp = icmp ult i32 %num, 2
+    br i1 %cmp, label %cleanup, label %if.end
+
+  if.end:
+    %arrayidx1 = bitcast [16 x i32]* %rstack to i32*
+    store volatile i32 %num, i32* %arrayidx1, align 4
+    br label %while.body
+
+  while.body:
+    %ptr.017 = phi i32 [ 1, %if.end ], [ %ptr.1, %if.end7 ]
+    %dec = add i32 %ptr.017, -1
+    %idxprom = zext i32 %dec to i64
+    %arrayidx2 = getelementptr inbounds [16 x i32], [16 x i32]* %rstack, i64 0, i64 %idxprom
+    %tmp1 = load volatile i32, i32* %arrayidx2, align 4
+    %cmp3 = icmp eq i32 %tmp1, 0
+    br i1 %cmp3, label %if.end7, label %if.then4
+
+  if.then4:
+    %sunkaddr = mul i64 %idxprom, 4
+    %0 = bitcast [16 x i32]* %rstack to i8*
+    %sunkaddr2 = getelementptr inbounds i8, i8* %0, i64 %sunkaddr
+    %1 = bitcast i8* %sunkaddr2 to i32*
+    store volatile i32 %tmp1, i32* %1, align 4
+    br label %if.end7
+
+  if.end7:
+    %ptr.1 = phi i32 [ %ptr.017, %if.then4 ], [ %dec, %while.body ]
+    %cmp1 = icmp eq i32 %ptr.1, 0
+    br i1 %cmp1, label %cleanup, label %while.body
+
+  cleanup:
+    %2 = bitcast [16 x i32]* %rstack to i8*
+    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %2)
+    ret void
+  }
+ ; Test from: https://bugs.llvm.org/show_bug.cgi?id=37472
+  define i32 @f(%struct.S* nocapture %arg, i32 %arg1) {
+  bb:
+    %tmp = alloca [4 x i8], align 1
+    %tmp2 = icmp ugt i32 %arg1, 4
+    br i1 %tmp2, label %bb16, label %bb3
+
+  bb3:
+    %tmp41 = bitcast [4 x i8]* %tmp to i8*
+    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp41)
+    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp41, i8* align 1 getelementptr inbounds ([4 x i8], [4 x i8]* @__const.f.arr, i64 0, i64 0), i64 4, i1 true)
+    %tmp5 = zext i32 %arg1 to i64
+    %tmp6 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i64 0, i64 %tmp5
+    %tmp7 = load volatile i8, i8* %tmp6, align 1
+    %tmp8 = zext i8 %tmp7 to i32
+    %tmp92 = bitcast %struct.S* %arg to i32*
+    store i32 %tmp8, i32* %tmp92, align 4
+    %tmp10 = icmp ult i32 %arg1, 3
+    br i1 %tmp10, label %bb11, label %bb15
+
+  bb11:
+    %0 = bitcast [4 x i8]* %tmp to i8*
+    %sunkaddr = getelementptr inbounds i8, i8* %0, i64 %tmp5
+    %tmp12 = load volatile i8, i8* %sunkaddr, align 1
+    %tmp13 = zext i8 %tmp12 to i32
+    %tmp14 = getelementptr inbounds %struct.S, %struct.S* %arg, i64 0, i32 1
+    store i32 %tmp13, i32* %tmp14, align 4
+    br label %bb15
+
+  bb15:
+    %1 = bitcast [4 x i8]* %tmp to i8*
+    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1)
+    br label %bb16
+
+  bb16:
+    %tmp17 = phi i32 [ 0, %bb15 ], [ 1, %bb ]
+    ret i32 %tmp17
+  }
+
+
+  declare void @llvm.stackprotector(i8*, i8**)
+
+...
+---
+name:            compiler_pop_stack
+alignment:       2
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+frameInfo:
+  maxAlignment:    4
+  maxCallFrameSize: 0
+  localFrameSize:  64
+stack:
+  - { id: 0, name: rstack, size: 64, alignment: 4, stack-id: 0, local-offset: -64 }
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $w0
+
+    dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv
+    Bcc 3, %bb.6, implicit killed $nzcv
+    B %bb.1
+
+  bb.1.if.end:
+    liveins: $w0
+
+    STRWui killed renamable $w0, %stack.0.rstack, 0 :: (volatile store 4 into %ir.arrayidx1)
+    renamable $w9 = MOVi32imm 1
+    renamable $x8 = ADDXri %stack.0.rstack, 0, 0
+
+  bb.2.while.body:
+    successors: %bb.3(0x30000000), %bb.4(0x50000000)
+    liveins: $w9, $x8
+
+    renamable $w10 = SUBWri renamable $w9, 1, 0, implicit-def $x10
+    renamable $w11 = LDRWroW renamable $x8, renamable $w10, 0, 1 :: (volatile load 4 from %ir.arrayidx2)
+    CBNZW renamable $w11, %bb.4
+
+  bb.3:
+    liveins: $x8, $x10
+
+    renamable $w9 = COPY renamable $w10, implicit killed $x10
+    B %bb.5
+
+  bb.4.if.then4:
+    liveins: $w9, $w11, $x8, $x10
+
+    STRWroX killed renamable $w11, renamable $x8, killed renamable $x10, 0, 1 :: (volatile store 4 into %ir.1)
+
+  bb.5.if.end7:
+    successors: %bb.6(0x04000000), %bb.2(0x7c000000)
+    liveins: $w9, $x8
+
+    CBNZW renamable $w9, %bb.2
+    B %bb.6
+
+  bb.6.cleanup:
+    RET_ReallyLR
+
+...
+---
+name:            f
+alignment:       2
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+frameInfo:
+  maxAlignment:    4
+  maxCallFrameSize: 0
+  localFrameSize:  4
+stack:
+  - { id: 0, name: tmp, size: 4, alignment: 4, stack-id: 0, local-offset: -4 }
+machineFunctionInfo: {}
+body:             |
+  bb.0.bb:
+    successors: %bb.1, %bb.2
+    liveins: $w1, $x0
+
+    dead $wzr = SUBSWri renamable $w1, 4, 0, implicit-def $nzcv
+    Bcc 9, %bb.2, implicit killed $nzcv
+
+  bb.1:
+    renamable $w0 = MOVi32imm 1
+    B %bb.5
+
+  bb.2.bb3:
+    successors: %bb.3, %bb.4
+    liveins: $w1, $x0
+
+    renamable $w9 = MOVi32imm 67305985
+    renamable $w8 = ORRWrs $wzr, renamable $w1, 0, implicit-def $x8
+    STRWui killed renamable $w9, %stack.0.tmp, 0 :: (volatile store 4 into %ir.tmp41)
+    renamable $x9 = ADDXri %stack.0.tmp, 0, 0
+    renamable $w10 = LDRBBroX renamable $x9, renamable $x8, 0, 0 :: (volatile load 1 from %ir.tmp6)
+    dead $wzr = SUBSWri killed renamable $w1, 2, 0, implicit-def $nzcv
+    STRWui killed renamable $w10, renamable $x0, 0 :: (store 4 into %ir.tmp92)
+    Bcc 8, %bb.4, implicit killed $nzcv
+    B %bb.3
+
+  bb.3.bb11:
+    liveins: $x0, $x8, $x9
+
+    renamable $w8 = LDRBBroX killed renamable $x9, killed renamable $x8, 0, 0 :: (volatile load 1 from %ir.sunkaddr)
+    STRWui killed renamable $w8, killed renamable $x0, 1 :: (store 4 into %ir.tmp14)
+
+  bb.4.bb15:
+    renamable $w0 = COPY $wzr
+
+  bb.5.bb16:
+    liveins: $w0
+
+    RET_ReallyLR implicit $w0
+
+...

Modified: llvm/trunk/test/CodeGen/AArch64/taildup-cfi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/taildup-cfi.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/taildup-cfi.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/taildup-cfi.ll Thu Jun 13 06:56:19 2019
@@ -32,7 +32,7 @@ if.then:
   store i32 0, i32* @f, align 4, !tbaa !2
   br label %if.end
 
-; DARWIN-NOT:       Merging into block
+; DARWIN:           Merging into block
 ; LINUX:    	      Merging into block
 
 if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then

Modified: llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll Thu Jun 13 06:56:19 2019
@@ -1,5 +1,6 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE
 ; We cannot merge this test with the main test for shrink-wrapping, because
 ; the code path we want to exerce is not taken with ios lowering.
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
@@ -12,53 +13,185 @@ target triple = "armv7--linux-gnueabi"
 ; The exit block of the loop happens to also lead to defs/uses of CSRs.
 ; It also post-dominates the loop body and we use to generate invalid
 ; restore sequence. I.e., we restored too early.
-;
-; CHECK-LABEL: wrongUseOfPostDominate:
-;
-; The prologue is the first thing happening in the function
-; without shrink-wrapping.
-; DISABLE: push
-;
-; CHECK: cmn r1, #1
-;
-; With shrink-wrapping, we branch to a pre-header, where the prologue
-; is located.
-; ENABLE-NEXT: ble [[LOOP_PREHEADER:[.a-zA-Z0-9_]+]]
-; Without shrink-wrapping, we go straight into the loop.
-; DISABLE-NEXT: ble [[LOOP_HEADER:[.a-zA-Z0-9_]+]]
-;
-; CHECK: @ %if.end29
-; DISABLE-NEXT: pop
-; ENABLE-NEXT: bx lr
-;
-; ENABLE: [[LOOP_PREHEADER]]
-; ENABLE: push
-; We must not find a pop here, otherwise that means we are in the loop
-; and are restoring before using the saved CSRs.
-; ENABLE-NOT: pop
-; ENALBE-NEXT: [[LOOP_HEADER:[.a-zA-Z0-9_]+]]: @ %while.cond2.outer
-;
-; DISABLE: [[LOOP_HEADER]]: @ %while.cond2.outer
-;
-; ENABLE-NOT: pop
-;
-; CHECK: @ %while.cond2
-; CHECK: add
-; CHECK-NEXT: cmp r{{[0-1]+}}, #1
-; Jump to the return block
-; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]]
-;
-; Use the back edge to check we get the label of the loop right.
-; This is to make sure we check the right loop pattern.
-; CHECK:  @ %while.body24.land.rhs14_crit_edge
-; CHECK: cmp r{{[0-9]+}}, #192
-; CHECK-NEXT bhs [[LOOP_HEADER]]
-;
-; CHECK: [[RETURN_BLOCK]]:
-; Set the return value.
-; CHECK-NEXT: mov r0,
-; CHECK-NEXT: pop
+
 define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {
+; ENABLE-LABEL: wrongUseOfPostDominate:
+; ENABLE:       @ %bb.0: @ %entry
+; ENABLE-NEXT:    .save {r11, lr}
+; ENABLE-NEXT:    push {r11, lr}
+; ENABLE-NEXT:    cmn r1, #1
+; ENABLE-NEXT:    ble .LBB0_6
+; ENABLE-NEXT:  @ %bb.1: @ %while.cond.preheader
+; ENABLE-NEXT:    cmp r1, #0
+; ENABLE-NEXT:    beq .LBB0_5
+; ENABLE-NEXT:  @ %bb.2: @ %while.cond.preheader
+; ENABLE-NEXT:    cmp r0, r2
+; ENABLE-NEXT:    pophs {r11, pc}
+; ENABLE-NEXT:    movw r12, :lower16:skip
+; ENABLE-NEXT:    sub r1, r1, #1
+; ENABLE-NEXT:    movt r12, :upper16:skip
+; ENABLE-NEXT:  .LBB0_3: @ %while.body
+; ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ldrb r3, [r0]
+; ENABLE-NEXT:    ldrb r3, [r12, r3]
+; ENABLE-NEXT:    add r0, r0, r3
+; ENABLE-NEXT:    sub r3, r1, #1
+; ENABLE-NEXT:    cmp r3, r1
+; ENABLE-NEXT:    bhs .LBB0_5
+; ENABLE-NEXT:  @ %bb.4: @ %while.body
+; ENABLE-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; ENABLE-NEXT:    cmp r0, r2
+; ENABLE-NEXT:    mov r1, r3
+; ENABLE-NEXT:    blo .LBB0_3
+; ENABLE-NEXT:  .LBB0_5: @ %if.end29
+; ENABLE-NEXT:    pop {r11, pc}
+; ENABLE-NEXT:  .LBB0_6: @ %while.cond2.outer
+; ENABLE-NEXT:    @ =>This Loop Header: Depth=1
+; ENABLE-NEXT:    @ Child Loop BB0_7 Depth 2
+; ENABLE-NEXT:    @ Child Loop BB0_14 Depth 2
+; ENABLE-NEXT:    mov r3, r0
+; ENABLE-NEXT:  .LBB0_7: @ %while.cond2
+; ENABLE-NEXT:    @ Parent Loop BB0_6 Depth=1
+; ENABLE-NEXT:    @ => This Inner Loop Header: Depth=2
+; ENABLE-NEXT:    add r1, r1, #1
+; ENABLE-NEXT:    cmp r1, #1
+; ENABLE-NEXT:    beq .LBB0_17
+; ENABLE-NEXT:  @ %bb.8: @ %while.body4
+; ENABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=2
+; ENABLE-NEXT:    cmp r3, r2
+; ENABLE-NEXT:    bls .LBB0_7
+; ENABLE-NEXT:  @ %bb.9: @ %if.then7
+; ENABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; ENABLE-NEXT:    mov r0, r3
+; ENABLE-NEXT:    ldrb r12, [r0, #-1]!
+; ENABLE-NEXT:    sxtb lr, r12
+; ENABLE-NEXT:    cmn lr, #1
+; ENABLE-NEXT:    bgt .LBB0_6
+; ENABLE-NEXT:  @ %bb.10: @ %if.then7
+; ENABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; ENABLE-NEXT:    cmp r0, r2
+; ENABLE-NEXT:    bls .LBB0_6
+; ENABLE-NEXT:  @ %bb.11: @ %land.rhs14.preheader
+; ENABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; ENABLE-NEXT:    cmn lr, #1
+; ENABLE-NEXT:    bgt .LBB0_6
+; ENABLE-NEXT:  @ %bb.12: @ %land.rhs14.preheader
+; ENABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; ENABLE-NEXT:    cmp r12, #191
+; ENABLE-NEXT:    bhi .LBB0_6
+; ENABLE-NEXT:  @ %bb.13: @ %while.body24.preheader
+; ENABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; ENABLE-NEXT:    sub r3, r3, #2
+; ENABLE-NEXT:  .LBB0_14: @ %while.body24
+; ENABLE-NEXT:    @ Parent Loop BB0_6 Depth=1
+; ENABLE-NEXT:    @ => This Inner Loop Header: Depth=2
+; ENABLE-NEXT:    mov r0, r3
+; ENABLE-NEXT:    cmp r3, r2
+; ENABLE-NEXT:    bls .LBB0_6
+; ENABLE-NEXT:  @ %bb.15: @ %while.body24.land.rhs14_crit_edge
+; ENABLE-NEXT:    @ in Loop: Header=BB0_14 Depth=2
+; ENABLE-NEXT:    mov r3, r0
+; ENABLE-NEXT:    ldrsb lr, [r3], #-1
+; ENABLE-NEXT:    cmn lr, #1
+; ENABLE-NEXT:    uxtb r12, lr
+; ENABLE-NEXT:    bgt .LBB0_6
+; ENABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
+; ENABLE-NEXT:    @ in Loop: Header=BB0_14 Depth=2
+; ENABLE-NEXT:    cmp r12, #192
+; ENABLE-NEXT:    blo .LBB0_14
+; ENABLE-NEXT:    b .LBB0_6
+; ENABLE-NEXT:  .LBB0_17:
+; ENABLE-NEXT:    mov r0, r3
+; ENABLE-NEXT:    pop {r11, pc}
+;
+; DISABLE-LABEL: wrongUseOfPostDominate:
+; DISABLE:       @ %bb.0: @ %entry
+; DISABLE-NEXT:    .save {r11, lr}
+; DISABLE-NEXT:    push {r11, lr}
+; DISABLE-NEXT:    cmn r1, #1
+; DISABLE-NEXT:    ble .LBB0_6
+; DISABLE-NEXT:  @ %bb.1: @ %while.cond.preheader
+; DISABLE-NEXT:    cmp r1, #0
+; DISABLE-NEXT:    beq .LBB0_5
+; DISABLE-NEXT:  @ %bb.2: @ %while.cond.preheader
+; DISABLE-NEXT:    cmp r0, r2
+; DISABLE-NEXT:    pophs {r11, pc}
+; DISABLE-NEXT:    movw r12, :lower16:skip
+; DISABLE-NEXT:    sub r1, r1, #1
+; DISABLE-NEXT:    movt r12, :upper16:skip
+; DISABLE-NEXT:  .LBB0_3: @ %while.body
+; DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ldrb r3, [r0]
+; DISABLE-NEXT:    ldrb r3, [r12, r3]
+; DISABLE-NEXT:    add r0, r0, r3
+; DISABLE-NEXT:    sub r3, r1, #1
+; DISABLE-NEXT:    cmp r3, r1
+; DISABLE-NEXT:    bhs .LBB0_5
+; DISABLE-NEXT:  @ %bb.4: @ %while.body
+; DISABLE-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; DISABLE-NEXT:    cmp r0, r2
+; DISABLE-NEXT:    mov r1, r3
+; DISABLE-NEXT:    blo .LBB0_3
+; DISABLE-NEXT:  .LBB0_5: @ %if.end29
+; DISABLE-NEXT:    pop {r11, pc}
+; DISABLE-NEXT:  .LBB0_6: @ %while.cond2.outer
+; DISABLE-NEXT:    @ =>This Loop Header: Depth=1
+; DISABLE-NEXT:    @ Child Loop BB0_7 Depth 2
+; DISABLE-NEXT:    @ Child Loop BB0_14 Depth 2
+; DISABLE-NEXT:    mov r3, r0
+; DISABLE-NEXT:  .LBB0_7: @ %while.cond2
+; DISABLE-NEXT:    @ Parent Loop BB0_6 Depth=1
+; DISABLE-NEXT:    @ => This Inner Loop Header: Depth=2
+; DISABLE-NEXT:    add r1, r1, #1
+; DISABLE-NEXT:    cmp r1, #1
+; DISABLE-NEXT:    beq .LBB0_17
+; DISABLE-NEXT:  @ %bb.8: @ %while.body4
+; DISABLE-NEXT:    @ in Loop: Header=BB0_7 Depth=2
+; DISABLE-NEXT:    cmp r3, r2
+; DISABLE-NEXT:    bls .LBB0_7
+; DISABLE-NEXT:  @ %bb.9: @ %if.then7
+; DISABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; DISABLE-NEXT:    mov r0, r3
+; DISABLE-NEXT:    ldrb r12, [r0, #-1]!
+; DISABLE-NEXT:    sxtb lr, r12
+; DISABLE-NEXT:    cmn lr, #1
+; DISABLE-NEXT:    bgt .LBB0_6
+; DISABLE-NEXT:  @ %bb.10: @ %if.then7
+; DISABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; DISABLE-NEXT:    cmp r0, r2
+; DISABLE-NEXT:    bls .LBB0_6
+; DISABLE-NEXT:  @ %bb.11: @ %land.rhs14.preheader
+; DISABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; DISABLE-NEXT:    cmn lr, #1
+; DISABLE-NEXT:    bgt .LBB0_6
+; DISABLE-NEXT:  @ %bb.12: @ %land.rhs14.preheader
+; DISABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; DISABLE-NEXT:    cmp r12, #191
+; DISABLE-NEXT:    bhi .LBB0_6
+; DISABLE-NEXT:  @ %bb.13: @ %while.body24.preheader
+; DISABLE-NEXT:    @ in Loop: Header=BB0_6 Depth=1
+; DISABLE-NEXT:    sub r3, r3, #2
+; DISABLE-NEXT:  .LBB0_14: @ %while.body24
+; DISABLE-NEXT:    @ Parent Loop BB0_6 Depth=1
+; DISABLE-NEXT:    @ => This Inner Loop Header: Depth=2
+; DISABLE-NEXT:    mov r0, r3
+; DISABLE-NEXT:    cmp r3, r2
+; DISABLE-NEXT:    bls .LBB0_6
+; DISABLE-NEXT:  @ %bb.15: @ %while.body24.land.rhs14_crit_edge
+; DISABLE-NEXT:    @ in Loop: Header=BB0_14 Depth=2
+; DISABLE-NEXT:    mov r3, r0
+; DISABLE-NEXT:    ldrsb lr, [r3], #-1
+; DISABLE-NEXT:    cmn lr, #1
+; DISABLE-NEXT:    uxtb r12, lr
+; DISABLE-NEXT:    bgt .LBB0_6
+; DISABLE-NEXT:  @ %bb.16: @ %while.body24.land.rhs14_crit_edge
+; DISABLE-NEXT:    @ in Loop: Header=BB0_14 Depth=2
+; DISABLE-NEXT:    cmp r12, #192
+; DISABLE-NEXT:    blo .LBB0_14
+; DISABLE-NEXT:    b .LBB0_6
+; DISABLE-NEXT:  .LBB0_17:
+; DISABLE-NEXT:    mov r0, r3
+; DISABLE-NEXT:    pop {r11, pc}
 entry:
   %cmp = icmp sgt i32 %off, -1
   br i1 %cmp, label %while.cond.preheader, label %while.cond2.outer

Modified: llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/arm-shrink-wrapping.ll Thu Jun 13 06:56:19 2019
@@ -1,11 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE
+; RUN:      | FileCheck %s --check-prefix=ARM-ENABLE
 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE
+; RUN:      | FileCheck %s --check-prefix=ARM-DISABLE
 ; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE
+; RUN:      | FileCheck %s --check-prefix=THUMB-ENABLE
 ; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE
+; RUN:      | FileCheck %s --check-prefix=THUMB-DISABLE
 
 ;
 ; Note: Lots of tests use inline asm instead of regular calls.
@@ -18,48 +19,111 @@
 ; the diffs.
 
 ; Initial motivating example: Simple diamond with a call just on one side.
-; CHECK-LABEL: foo:
+; foo:
 ;
 ; Compare the arguments and jump to exit.
 ; No prologue needed.
-; ENABLE: cmp r0, r1
-; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+; cmp r0, r1
+; bge [[EXIT_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; push {r7, lr}
+; mov r7, sp
 ;;
 ; Compare the arguments and jump to exit.
 ; After the prologue is set.
-; DISABLE: sub sp
-; DISABLE: cmp r0, r1
-; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+; sub sp
+; cmp r0, r1
+; bge [[EXIT_LABEL:LBB[0-9_]+]]
 ;
 ; Store %a in the alloca.
-; ARM-ENABLE: push {r0}
-; THUMB-ENABLE: str r0, [sp, #-4]
-; DISABLE: str r0, [sp]
+; push {r0}
+; str r0, [sp, #-4]
+; str r0, [sp]
 ; Set the alloca address in the second argument.
-; CHECK-NEXT: mov r1, sp
+; mov r1, sp
 ; Set the first argument to zero.
-; CHECK-NEXT: mov{{s?}} r0, #0
-; CHECK-NEXT: bl{{x?}} _doSomething
+; mov{{s?}} r0, #0
+; bl{{x?}} _doSomething
 ;
 ; With shrink-wrapping, epilogue is just after the call.
-; ARM-ENABLE-NEXT: mov sp, r7
-; THUMB-ENABLE-NEXT: add sp, #4
-; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}
+; mov sp, r7
+; add sp, #4
+; pop{{(\.w)?}} {r7, lr}
 ;
-; CHECK: [[EXIT_LABEL]]:
+; [[EXIT_LABEL]]:
 ;
 ; Without shrink-wrapping, epilogue is in the exit block.
 ; Epilogue code. (What we pop does not matter.)
-; ARM-DISABLE: mov sp, r7
-; THUMB-DISABLE: add sp, 
-; DISABLE-NEXT: pop {r7, pc}
+; mov sp, r7
+; add sp,
+; pop {r7, pc}
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 define i32 @foo(i32 %a, i32 %b) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: foo:
+; ARM-ENABLE:       @ %bb.0:
+; ARM-ENABLE-NEXT:    cmp r0, r1
+; ARM-ENABLE-NEXT:    bge LBB0_2
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %true
+; ARM-ENABLE-NEXT:    push {r7, lr}
+; ARM-ENABLE-NEXT:    mov r7, sp
+; ARM-ENABLE-NEXT:    push {r0}
+; ARM-ENABLE-NEXT:    mov r1, sp
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    bl _doSomething
+; ARM-ENABLE-NEXT:    mov sp, r7
+; ARM-ENABLE-NEXT:    pop {r7, lr}
+; ARM-ENABLE-NEXT:  LBB0_2: @ %false
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: foo:
+; ARM-DISABLE:       @ %bb.0:
+; ARM-DISABLE-NEXT:    push {r7, lr}
+; ARM-DISABLE-NEXT:    mov r7, sp
+; ARM-DISABLE-NEXT:    sub sp, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, r1
+; ARM-DISABLE-NEXT:    bge LBB0_2
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %true
+; ARM-DISABLE-NEXT:    str r0, [sp]
+; ARM-DISABLE-NEXT:    mov r1, sp
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    bl _doSomething
+; ARM-DISABLE-NEXT:  LBB0_2: @ %false
+; ARM-DISABLE-NEXT:    mov sp, r7
+; ARM-DISABLE-NEXT:    pop {r7, pc}
+;
+; THUMB-ENABLE-LABEL: foo:
+; THUMB-ENABLE:       @ %bb.0:
+; THUMB-ENABLE-NEXT:    cmp r0, r1
+; THUMB-ENABLE-NEXT:    bge LBB0_2
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %true
+; THUMB-ENABLE-NEXT:    push {r7, lr}
+; THUMB-ENABLE-NEXT:    mov r7, sp
+; THUMB-ENABLE-NEXT:    str r0, [sp, #-4]!
+; THUMB-ENABLE-NEXT:    mov r1, sp
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    bl _doSomething
+; THUMB-ENABLE-NEXT:    add sp, #4
+; THUMB-ENABLE-NEXT:    pop.w {r7, lr}
+; THUMB-ENABLE-NEXT:  LBB0_2: @ %false
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: foo:
+; THUMB-DISABLE:       @ %bb.0:
+; THUMB-DISABLE-NEXT:    push {r7, lr}
+; THUMB-DISABLE-NEXT:    mov r7, sp
+; THUMB-DISABLE-NEXT:    sub sp, #4
+; THUMB-DISABLE-NEXT:    cmp r0, r1
+; THUMB-DISABLE-NEXT:    bge LBB0_2
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %true
+; THUMB-DISABLE-NEXT:    str r0, [sp]
+; THUMB-DISABLE-NEXT:    mov r1, sp
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    bl _doSomething
+; THUMB-DISABLE-NEXT:  LBB0_2: @ %false
+; THUMB-DISABLE-NEXT:    add sp, #4
+; THUMB-DISABLE-NEXT:    pop {r7, pc}
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
@@ -80,51 +144,156 @@ declare i32 @doSomething(i32, i32*)
 
 ; Check that we do not perform the restore inside the loop whereas the save
 ; is outside.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+; freqSaveAndRestoreOutsideLoop:
 ;
 ; Shrink-wrapping allows to skip the prologue in the else case.
-; ARM-ENABLE: cmp r0, #0
-; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, r7, lr}
-; CHECK-NEXT: add r7, sp, #4
+; push {r4, r7, lr}
+; add r7, sp, #4
 ;
-; ARM-DISABLE: cmp r0, #0
-; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; SUM is in r0 because it is coalesced with the second
 ; argument on the else path.
-; CHECK: mov{{s?}} [[SUM:r0]], #0
-; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+; mov{{s?}} [[SUM:r0]], #0
+; mov{{s?}} [[IV:r[0-9]+]], #10
 ;
 ; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB: add [[SUM]], [[TMP]]
-; ARM-NEXT: subs [[IV]], [[IV]], #1
-; THUMB-NEXT: subs [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
+; [[LOOP:LBB[0-9_]+]]: @ %for.body
+; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; add [[SUM]], [[TMP]], [[SUM]]
+; add [[SUM]], [[TMP]]
+; subs [[IV]], [[IV]], #1
+; subs [[IV]], #1
+; bne [[LOOP]]
 ;
 ; Next BB.
 ; SUM << 3.
-; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
-; ENABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} [[SUM]], [[SUM]], #3
+; pop {r4, r7, pc}
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, r7, pc}
+; pop {r4, r7, pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsl{{s?}} r0, r1, #1
-; DISABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} r0, r1, #1
+; pop {r4, r7, pc}
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    beq LBB1_4
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    mov r1, #10
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:  LBB1_2: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r2, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    add r0, r2, r0
+; ARM-ENABLE-NEXT:    subs r1, r1, #1
+; ARM-ENABLE-NEXT:    bne LBB1_2
+; ARM-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-ENABLE-NEXT:    lsl r0, r0, #3
+; ARM-ENABLE-NEXT:    pop {r4, r7, pc}
+; ARM-ENABLE-NEXT:  LBB1_4: @ %if.else
+; ARM-ENABLE-NEXT:    lsl r0, r1, #1
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    beq LBB1_4
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    mov r1, #10
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:  LBB1_2: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r2, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    add r0, r2, r0
+; ARM-DISABLE-NEXT:    subs r1, r1, #1
+; ARM-DISABLE-NEXT:    bne LBB1_2
+; ARM-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-DISABLE-NEXT:    lsl r0, r0, #3
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+; ARM-DISABLE-NEXT:  LBB1_4: @ %if.else
+; ARM-DISABLE-NEXT:    lsl r0, r1, #1
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbz r0, LBB1_4
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    movs r1, #10
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:  LBB1_2: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r2, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    add r0, r2
+; THUMB-ENABLE-NEXT:    subs r1, #1
+; THUMB-ENABLE-NEXT:    bne LBB1_2
+; THUMB-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-ENABLE-NEXT:    lsls r0, r0, #3
+; THUMB-ENABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-ENABLE-NEXT:  LBB1_4: @ %if.else
+; THUMB-ENABLE-NEXT:    lsls r0, r1, #1
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    cbz r0, LBB1_4
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    movs r1, #10
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:  LBB1_2: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r2, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    add r0, r2
+; THUMB-DISABLE-NEXT:    subs r1, #1
+; THUMB-DISABLE-NEXT:    bne LBB1_2
+; THUMB-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-DISABLE-NEXT:    lsls r0, r0, #3
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-DISABLE-NEXT:  LBB1_4: @ %if.else
+; THUMB-DISABLE-NEXT:    lsls r0, r1, #1
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -159,26 +328,162 @@ declare i32 @something(...)
 
 ; Check that we do not perform the shrink-wrapping inside the loop even
 ; though that would be legal. The cost model must prevent that.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; freqSaveAndRestoreOutsideLoop2:
 ; Prologue code.
 ; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4
-; CHECK: mov{{s?}} [[SUM:r0]], #0
-; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
-; CHECK: nop
+; push {r4
+; mov{{s?}} [[SUM:r0]], #0
+; mov{{s?}} [[IV:r[0-9]+]], #10
+; nop
 ; Next BB.
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
-; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB: add [[SUM]], [[TMP]]
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
-; CHECK-NEXT: bne [[LOOP_LABEL]]
+; [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
+; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; add [[SUM]], [[TMP]], [[SUM]]
+; add [[SUM]], [[TMP]]
+; subs [[IV]], [[IV]], #1
+; subs [[IV]], #1
+; bne [[LOOP_LABEL]]
 ; Next BB.
-; CHECK: @ %for.exit
-; CHECK: nop
-; CHECK: pop {r4
+; @ %for.exit
+; nop
+; pop {r4
 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) "no-frame-pointer-elim"="true" {
+; ARM-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    push {r4, r7, lr}
+; ARM-NEXT:    add r7, sp, #4
+; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    mov r1, #10
+; ARM-NEXT:    @ InlineAsm Start
+; ARM-NEXT:    nop
+; ARM-NEXT:    @ InlineAsm End
+; ARM-NEXT:  LBB2_1: @ %for.body
+; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-NEXT:    @ InlineAsm Start
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    @ InlineAsm End
+; ARM-NEXT:    add r0, r2, r0
+; ARM-NEXT:    subs r1, r1, #1
+; ARM-NEXT:    bne LBB2_1
+; ARM-NEXT:  @ %bb.2: @ %for.exit
+; ARM-NEXT:    @ InlineAsm Start
+; ARM-NEXT:    nop
+; ARM-NEXT:    @ InlineAsm End
+; ARM-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-LABEL: freqSaveAndRestoreOutsideLoop2:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    push {r4, r7, lr}
+; THUMB-NEXT:    add r7, sp, #4
+; THUMB-NEXT:    movs r0, #0
+; THUMB-NEXT:    movs r1, #10
+; THUMB-NEXT:    @ InlineAsm Start
+; THUMB-NEXT:    nop
+; THUMB-NEXT:    @ InlineAsm End
+; THUMB-NEXT:  LBB2_1: @ %for.body
+; THUMB-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-NEXT:    @ InlineAsm Start
+; THUMB-NEXT:    mov.w r2, #1
+; THUMB-NEXT:    @ InlineAsm End
+; THUMB-NEXT:    add r0, r2
+; THUMB-NEXT:    subs r1, #1
+; THUMB-NEXT:    bne LBB2_1
+; THUMB-NEXT:  @ %bb.2: @ %for.exit
+; THUMB-NEXT:    @ InlineAsm Start
+; THUMB-NEXT:    nop
+; THUMB-NEXT:    @ InlineAsm End
+; THUMB-NEXT:    pop {r4, r7, pc}
+; ARM-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    mov r1, #10
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:  LBB2_1: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r2, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    add r0, r2, r0
+; ARM-ENABLE-NEXT:    subs r1, r1, #1
+; ARM-ENABLE-NEXT:    bne LBB2_1
+; ARM-ENABLE-NEXT:  @ %bb.2: @ %for.exit
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    pop {r4, r7, pc}
+;
+; ARM-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    mov r1, #10
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:  LBB2_1: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r2, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    add r0, r2, r0
+; ARM-DISABLE-NEXT:    subs r1, r1, #1
+; ARM-DISABLE-NEXT:    bne LBB2_1
+; ARM-DISABLE-NEXT:  @ %bb.2: @ %for.exit
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    movs r1, #10
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:  LBB2_1: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r2, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    add r0, r2
+; THUMB-ENABLE-NEXT:    subs r1, #1
+; THUMB-ENABLE-NEXT:    bne LBB2_1
+; THUMB-ENABLE-NEXT:  @ %bb.2: @ %for.exit
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    movs r1, #10
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:  LBB2_1: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r2, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    add r0, r2
+; THUMB-DISABLE-NEXT:    subs r1, #1
+; THUMB-DISABLE-NEXT:    bne LBB2_1
+; THUMB-DISABLE-NEXT:  @ %bb.2: @ %for.exit
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 entry:
   br label %for.preheader
 
@@ -205,50 +510,167 @@ for.end:
 
 ; Check with a more complex case that we do not have save within the loop and
 ; restore outside.
-; CHECK-LABEL: loopInfoSaveOutsideLoop:
+; loopInfoSaveOutsideLoop:
 ;
-; ARM-ENABLE: cmp r0, #0
-; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, r7, lr}
-; CHECK-NEXT: add r7, sp, #4
+; push {r4, r7, lr}
+; add r7, sp, #4
 ;
-; ARM-DISABLE: cmp r0, #0
-; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; SUM is in r0 because it is coalesced with the second
 ; argument on the else path.
-; CHECK: mov{{s?}} [[SUM:r0]], #0
-; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+; mov{{s?}} [[SUM:r0]], #0
+; mov{{s?}} [[IV:r[0-9]+]], #10
 ;
 ; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB: add [[SUM]], [[TMP]]
-; ARM-NEXT: subs [[IV]], [[IV]], #1
-; THUMB-NEXT: subs [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
+; [[LOOP:LBB[0-9_]+]]: @ %for.body
+; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; add [[SUM]], [[TMP]], [[SUM]]
+; add [[SUM]], [[TMP]]
+; subs [[IV]], [[IV]], #1
+; subs [[IV]], #1
+; bne [[LOOP]]
 ;
 ; Next BB.
 ; SUM << 3.
-; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
-; ENABLE: pop {r4, r7, pc}
+; lsl{{s?}} [[SUM]], [[SUM]], #3
+; pop {r4, r7, pc}
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, r7, pc}
+; pop {r4, r7, pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsl{{s?}} r0, r1, #1
-; DISABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} r0, r1, #1
+; pop {r4, r7, pc}
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: loopInfoSaveOutsideLoop:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    beq LBB3_4
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    mov r1, #10
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:  LBB3_2: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r2, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    add r0, r2, r0
+; ARM-ENABLE-NEXT:    subs r1, r1, #1
+; ARM-ENABLE-NEXT:    bne LBB3_2
+; ARM-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-ENABLE-NEXT:    lsl r0, r0, #3
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    pop {r4, r7, pc}
+; ARM-ENABLE-NEXT:  LBB3_4: @ %if.else
+; ARM-ENABLE-NEXT:    lsl r0, r1, #1
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: loopInfoSaveOutsideLoop:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    beq LBB3_4
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    mov r1, #10
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:  LBB3_2: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r2, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    add r0, r2, r0
+; ARM-DISABLE-NEXT:    subs r1, r1, #1
+; ARM-DISABLE-NEXT:    bne LBB3_2
+; ARM-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-DISABLE-NEXT:    lsl r0, r0, #3
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+; ARM-DISABLE-NEXT:  LBB3_4: @ %if.else
+; ARM-DISABLE-NEXT:    lsl r0, r1, #1
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-ENABLE-LABEL: loopInfoSaveOutsideLoop:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbz r0, LBB3_4
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    movs r1, #10
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:  LBB3_2: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r2, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    add r0, r2
+; THUMB-ENABLE-NEXT:    subs r1, #1
+; THUMB-ENABLE-NEXT:    bne LBB3_2
+; THUMB-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-ENABLE-NEXT:    lsls r0, r0, #3
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-ENABLE-NEXT:  LBB3_4: @ %if.else
+; THUMB-ENABLE-NEXT:    lsls r0, r1, #1
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: loopInfoSaveOutsideLoop:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    cbz r0, LBB3_4
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    movs r1, #10
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:  LBB3_2: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r2, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    add r0, r2
+; THUMB-DISABLE-NEXT:    subs r1, #1
+; THUMB-DISABLE-NEXT:    bne LBB3_2
+; THUMB-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-DISABLE-NEXT:    lsls r0, r0, #3
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-DISABLE-NEXT:  LBB3_4: @ %if.else
+; THUMB-DISABLE-NEXT:    lsls r0, r1, #1
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -284,50 +706,155 @@ declare void @somethingElse(...)
 
 ; Check with a more complex case that we do not have restore within the loop and
 ; save outside.
-; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+; loopInfoRestoreOutsideLoop:
 ;
-; ARM-ENABLE: cmp r0, #0
-; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, r7, lr}
-; CHECK-NEXT: add r7, sp, #4
+; push {r4, r7, lr}
+; add r7, sp, #4
 ;
-; ARM-DISABLE: cmp r0, #0
-; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; SUM is in r0 because it is coalesced with the second
 ; argument on the else path.
-; CHECK: mov{{s?}} [[SUM:r0]], #0
-; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+; mov{{s?}} [[SUM:r0]], #0
+; mov{{s?}} [[IV:r[0-9]+]], #10
 ;
 ; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB: add [[SUM]], [[TMP]]
-; ARM-NEXT: subs [[IV]], [[IV]], #1
-; THUMB-NEXT: subs [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
+; [[LOOP:LBB[0-9_]+]]: @ %for.body
+; mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; add [[SUM]], [[TMP]], [[SUM]]
+; add [[SUM]], [[TMP]]
+; subs [[IV]], [[IV]], #1
+; subs [[IV]], #1
+; bne [[LOOP]]
 ;
 ; Next BB.
 ; SUM << 3.
-; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
-; ENABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} [[SUM]], [[SUM]], #3
+; pop {r4, r7, pc}
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, r7, pc}
+; pop {r4, r7, pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsl{{s?}} r0, r1, #1
-; DISABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} r0, r1, #1
+; pop {r4, r7, pc}
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" nounwind {
+; ARM-ENABLE-LABEL: loopInfoRestoreOutsideLoop:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    beq LBB4_4
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    mov r1, #10
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:  LBB4_2: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r2, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    add r0, r2, r0
+; ARM-ENABLE-NEXT:    subs r1, r1, #1
+; ARM-ENABLE-NEXT:    bne LBB4_2
+; ARM-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-ENABLE-NEXT:    lsl r0, r0, #3
+; ARM-ENABLE-NEXT:    pop {r4, r7, pc}
+; ARM-ENABLE-NEXT:  LBB4_4: @ %if.else
+; ARM-ENABLE-NEXT:    lsl r0, r1, #1
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: loopInfoRestoreOutsideLoop:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    beq LBB4_4
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    mov r1, #10
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:  LBB4_2: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r2, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    add r0, r2, r0
+; ARM-DISABLE-NEXT:    subs r1, r1, #1
+; ARM-DISABLE-NEXT:    bne LBB4_2
+; ARM-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; ARM-DISABLE-NEXT:    lsl r0, r0, #3
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+; ARM-DISABLE-NEXT:  LBB4_4: @ %if.else
+; ARM-DISABLE-NEXT:    lsl r0, r1, #1
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-ENABLE-LABEL: loopInfoRestoreOutsideLoop:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbz r0, LBB4_4
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    movs r1, #10
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:  LBB4_2: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r2, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    add r0, r2
+; THUMB-ENABLE-NEXT:    subs r1, #1
+; THUMB-ENABLE-NEXT:    bne LBB4_2
+; THUMB-ENABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-ENABLE-NEXT:    lsls r0, r0, #3
+; THUMB-ENABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-ENABLE-NEXT:  LBB4_4: @ %if.else
+; THUMB-ENABLE-NEXT:    lsls r0, r1, #1
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: loopInfoRestoreOutsideLoop:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    cbz r0, LBB4_4
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    movs r1, #10
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:  LBB4_2: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r2, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    add r0, r2
+; THUMB-DISABLE-NEXT:    subs r1, #1
+; THUMB-DISABLE-NEXT:    bne LBB4_2
+; THUMB-DISABLE-NEXT:  @ %bb.3: @ %for.end
+; THUMB-DISABLE-NEXT:    lsls r0, r0, #3
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-DISABLE-NEXT:  LBB4_4: @ %if.else
+; THUMB-DISABLE-NEXT:    lsls r0, r1, #1
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -359,53 +886,190 @@ if.end:
 }
 
 ; Check that we handle function with no frame information correctly.
-; CHECK-LABEL: emptyFrame:
-; CHECK: @ %entry
-; CHECK-NEXT: mov{{s?}} r0, #0
-; CHECK-NEXT: bx lr
+; emptyFrame:
+; @ %entry
+; mov{{s?}} r0, #0
+; bx lr
 define i32 @emptyFrame() {
+; ARM-LABEL: emptyFrame:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    bx lr
+;
+; THUMB-LABEL: emptyFrame:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    movs r0, #0
+; THUMB-NEXT:    bx lr
+; ARM-ENABLE-LABEL: emptyFrame:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: emptyFrame:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    bx lr
+;
+; THUMB-ENABLE-LABEL: emptyFrame:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: emptyFrame:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    bx lr
 entry:
   ret i32 0
 }
 
 ; Check that we handle inline asm correctly.
-; CHECK-LABEL: inlineAsm:
+; inlineAsm:
 ;
-; ARM-ENABLE: cmp r0, #0
-; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, r7, lr}
-; CHECK-NEXT: add r7, sp, #4
+; push {r4, r7, lr}
+; add r7, sp, #4
 ;
-; ARM-DISABLE: cmp r0, #0
-; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
-; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10
+; mov{{s?}} [[IV:r[0-9]+]], #10
 ;
 ; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
-; CHECK: add{{(\.w)?}} r4, r4, #1
-; CHECK: bne [[LOOP]]
+; [[LOOP:LBB[0-9_]+]]: @ %for.body
+; subs [[IV]], [[IV]], #1
+; subs [[IV]], #1
+; add{{(\.w)?}} r4, r4, #1
+; bne [[LOOP]]
 ;
 ; Next BB.
-; CHECK: mov{{s?}} r0, #0
+; mov{{s?}} r0, #0
 ;
 ; Duplicated epilogue.
-; DISABLE: pop {r4, r7, pc}
+; pop {r4, r7, pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsl{{s?}} r0, r1, #1
-; DISABLE-NEXT: pop {r4, r7, pc}
+; lsl{{s?}} r0, r1, #1
+; pop {r4, r7, pc}
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 define i32 @inlineAsm(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: inlineAsm:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    beq LBB6_4
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    mov r0, #10
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:  LBB6_2: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    subs r0, r0, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    add r4, r4, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    bne LBB6_2
+; ARM-ENABLE-NEXT:  @ %bb.3: @ %for.exit
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    nop
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    pop {r4, r7, pc}
+; ARM-ENABLE-NEXT:  LBB6_4: @ %if.else
+; ARM-ENABLE-NEXT:    lsl r0, r1, #1
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: inlineAsm:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    beq LBB6_4
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; ARM-DISABLE-NEXT:    mov r0, #10
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:  LBB6_2: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    subs r0, r0, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    add r4, r4, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    bne LBB6_2
+; ARM-DISABLE-NEXT:  @ %bb.3: @ %for.exit
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    nop
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+; ARM-DISABLE-NEXT:  LBB6_4: @ %if.else
+; ARM-DISABLE-NEXT:    lsl r0, r1, #1
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+;
+; THUMB-ENABLE-LABEL: inlineAsm:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbz r0, LBB6_4
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    movs r0, #10
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:  LBB6_2: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    subs r0, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    add.w r4, r4, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    bne LBB6_2
+; THUMB-ENABLE-NEXT:  @ %bb.3: @ %for.exit
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    nop
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-ENABLE-NEXT:  LBB6_4: @ %if.else
+; THUMB-ENABLE-NEXT:    lsls r0, r1, #1
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: inlineAsm:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    cbz r0, LBB6_4
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %for.preheader
+; THUMB-DISABLE-NEXT:    movs r0, #10
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:  LBB6_2: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    subs r0, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    add.w r4, r4, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    bne LBB6_2
+; THUMB-DISABLE-NEXT:  @ %bb.3: @ %for.exit
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    nop
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-DISABLE-NEXT:  LBB6_4: @ %if.else
+; THUMB-DISABLE-NEXT:    lsls r0, r1, #1
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -435,46 +1099,131 @@ if.end:
 }
 
 ; Check that we handle calls to variadic functions correctly.
-; CHECK-LABEL: callVariadicFunc:
+; callVariadicFunc:
 ;
-; ARM-ENABLE: cmp r0, #0
-; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
-; CHECK-NEXT: sub sp, {{(sp, )?}}#12
-;
-; ARM-DISABLE: cmp r0, #0
-; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+; push {r7, lr}
+; mov r7, sp
+; sub sp, {{(sp, )?}}#12
+;
+; cmp r0, #0
+; beq [[ELSE_LABEL:LBB[0-9_]+]]
+; cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Setup of the varags.
-; CHECK: mov r0, r1
-; CHECK-NEXT: mov r2, r1
-; CHECK-NEXT: mov r3, r1
-; ARM-NEXT: str r1, [sp]
-; ARM-NEXT: str r1, [sp, #4]
-; THUMB-NEXT: strd r1, r1, [sp]
-; CHECK-NEXT: str r1, [sp, #8]
-; CHECK-NEXT: bl{{x?}} _someVariadicFunc
-; CHECK-NEXT: lsl{{s?}} r0, r0, #3
-; ARM-NEXT: mov sp, r7
-; THUMB-NEXT: add sp, #12
-; CHECK-NEXT: pop {r7, pc}
+; mov r0, r1
+; mov r2, r1
+; mov r3, r1
+; str r1, [sp]
+; str r1, [sp, #4]
+; strd r1, r1, [sp]
+; str r1, [sp, #8]
+; bl{{x?}} _someVariadicFunc
+; lsl{{s?}} r0, r0, #3
+; mov sp, r7
+; add sp, #12
+; pop {r7, pc}
 ;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
+; [[ELSE_LABEL]]: @ %if.else
 ; Shift second argument by one and store into returned register.
-; CHECK: lsl{{s?}} r0, r1, #1
+; lsl{{s?}} r0, r1, #1
 ;
 ; Epilogue code.
-; ENABLE-NEXT: bx lr
+; bx lr
 ;
-; ARM-DISABLE-NEXT: mov sp, r7
-; THUMB-DISABLE-NEXT: add sp, #12
-; DISABLE-NEXT: pop {r7, pc}
+; mov sp, r7
+; add sp, #12
+; pop {r7, pc}
 define i32 @callVariadicFunc(i32 %cond, i32 %N) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: callVariadicFunc:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    beq LBB7_2
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-ENABLE-NEXT:    push {r7, lr}
+; ARM-ENABLE-NEXT:    mov r7, sp
+; ARM-ENABLE-NEXT:    sub sp, sp, #12
+; ARM-ENABLE-NEXT:    mov r0, r1
+; ARM-ENABLE-NEXT:    mov r2, r1
+; ARM-ENABLE-NEXT:    mov r3, r1
+; ARM-ENABLE-NEXT:    str r1, [sp]
+; ARM-ENABLE-NEXT:    str r1, [sp, #4]
+; ARM-ENABLE-NEXT:    str r1, [sp, #8]
+; ARM-ENABLE-NEXT:    bl _someVariadicFunc
+; ARM-ENABLE-NEXT:    lsl r0, r0, #3
+; ARM-ENABLE-NEXT:    mov sp, r7
+; ARM-ENABLE-NEXT:    pop {r7, pc}
+; ARM-ENABLE-NEXT:  LBB7_2: @ %if.else
+; ARM-ENABLE-NEXT:    lsl r0, r1, #1
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: callVariadicFunc:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r7, lr}
+; ARM-DISABLE-NEXT:    mov r7, sp
+; ARM-DISABLE-NEXT:    sub sp, sp, #12
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    beq LBB7_2
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-DISABLE-NEXT:    mov r0, r1
+; ARM-DISABLE-NEXT:    mov r2, r1
+; ARM-DISABLE-NEXT:    mov r3, r1
+; ARM-DISABLE-NEXT:    str r1, [sp]
+; ARM-DISABLE-NEXT:    str r1, [sp, #4]
+; ARM-DISABLE-NEXT:    str r1, [sp, #8]
+; ARM-DISABLE-NEXT:    bl _someVariadicFunc
+; ARM-DISABLE-NEXT:    lsl r0, r0, #3
+; ARM-DISABLE-NEXT:    mov sp, r7
+; ARM-DISABLE-NEXT:    pop {r7, pc}
+; ARM-DISABLE-NEXT:  LBB7_2: @ %if.else
+; ARM-DISABLE-NEXT:    lsl r0, r1, #1
+; ARM-DISABLE-NEXT:    mov sp, r7
+; ARM-DISABLE-NEXT:    pop {r7, pc}
+;
+; THUMB-ENABLE-LABEL: callVariadicFunc:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbz r0, LBB7_2
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-ENABLE-NEXT:    push {r7, lr}
+; THUMB-ENABLE-NEXT:    mov r7, sp
+; THUMB-ENABLE-NEXT:    sub sp, #12
+; THUMB-ENABLE-NEXT:    mov r0, r1
+; THUMB-ENABLE-NEXT:    mov r2, r1
+; THUMB-ENABLE-NEXT:    mov r3, r1
+; THUMB-ENABLE-NEXT:    strd r1, r1, [sp]
+; THUMB-ENABLE-NEXT:    str r1, [sp, #8]
+; THUMB-ENABLE-NEXT:    bl _someVariadicFunc
+; THUMB-ENABLE-NEXT:    lsls r0, r0, #3
+; THUMB-ENABLE-NEXT:    add sp, #12
+; THUMB-ENABLE-NEXT:    pop {r7, pc}
+; THUMB-ENABLE-NEXT:  LBB7_2: @ %if.else
+; THUMB-ENABLE-NEXT:    lsls r0, r1, #1
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: callVariadicFunc:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r7, lr}
+; THUMB-DISABLE-NEXT:    mov r7, sp
+; THUMB-DISABLE-NEXT:    sub sp, #12
+; THUMB-DISABLE-NEXT:    cbz r0, LBB7_2
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-DISABLE-NEXT:    mov r0, r1
+; THUMB-DISABLE-NEXT:    mov r2, r1
+; THUMB-DISABLE-NEXT:    mov r3, r1
+; THUMB-DISABLE-NEXT:    strd r1, r1, [sp]
+; THUMB-DISABLE-NEXT:    str r1, [sp, #8]
+; THUMB-DISABLE-NEXT:    bl _someVariadicFunc
+; THUMB-DISABLE-NEXT:    lsls r0, r0, #3
+; THUMB-DISABLE-NEXT:    add sp, #12
+; THUMB-DISABLE-NEXT:    pop {r7, pc}
+; THUMB-DISABLE-NEXT:  LBB7_2: @ %if.else
+; THUMB-DISABLE-NEXT:    lsls r0, r1, #1
+; THUMB-DISABLE-NEXT:    add sp, #12
+; THUMB-DISABLE-NEXT:    pop {r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -499,29 +1248,86 @@ declare i32 @someVariadicFunc(i32, ...)
 ; Although this is not incorrect to insert such code, it is useless
 ; and it hurts the binary size.
 ;
-; CHECK-LABEL: noreturn:
-; DISABLE: push
-; ARM-ENABLE: cmp r0, #0
-; ARM-DISABLE: cmp r0, #0
-; ARM-ENABLE: bne [[ABORT:LBB[0-9_]+]]
-; ARM-DISABLE: bne [[ABORT:LBB[0-9_]+]]
-; THUMB-ENABLE: cbnz r0,  [[ABORT:LBB[0-9_]+]]
-; THUMB-DISABLE: cbnz r0,  [[ABORT:LBB[0-9_]+]]
+; noreturn:
+; push
+; cmp r0, #0
+; cmp r0, #0
+; bne [[ABORT:LBB[0-9_]+]]
+; bne [[ABORT:LBB[0-9_]+]]
+; cbnz r0,  [[ABORT:LBB[0-9_]+]]
+; cbnz r0,  [[ABORT:LBB[0-9_]+]]
 
 ;
-; CHECK: mov{{s?}} r0, #42
+; mov{{s?}} r0, #42
 ;
-; ENABLE-NEXT: bx lr
+; bx lr
 ;
-; DISABLE-NEXT: pop
+; pop
 ;;
-; CHECK: [[ABORT]]: @ %if.abort
+; [[ABORT]]: @ %if.abort
 ;
-; ENABLE: push
+; push
 ;
-; CHECK: bl{{x?}} _abort
-; ENABLE-NOT: pop
+; bl{{x?}} _abort
+; pop
 define i32 @noreturn(i8 signext %bad_thing) "no-frame-pointer-elim"="true" {
+; ARM-ENABLE-LABEL: noreturn:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    bne LBB8_2
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %if.end
+; ARM-ENABLE-NEXT:    mov r0, #42
+; ARM-ENABLE-NEXT:    bx lr
+; ARM-ENABLE-NEXT:  LBB8_2: @ %if.abort
+; ARM-ENABLE-NEXT:    push {r4, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #4
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r0, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    bl _abort
+;
+; ARM-DISABLE-LABEL: noreturn:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #4
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    bne LBB8_2
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %if.end
+; ARM-DISABLE-NEXT:    mov r0, #42
+; ARM-DISABLE-NEXT:    pop {r4, r7, pc}
+; ARM-DISABLE-NEXT:  LBB8_2: @ %if.abort
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r0, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    bl _abort
+;
+; THUMB-ENABLE-LABEL: noreturn:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    cbnz r0, LBB8_2
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %if.end
+; THUMB-ENABLE-NEXT:    movs r0, #42
+; THUMB-ENABLE-NEXT:    bx lr
+; THUMB-ENABLE-NEXT:  LBB8_2: @ %if.abort
+; THUMB-ENABLE-NEXT:    push {r4, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #4
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r0, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    bl _abort
+;
+; THUMB-DISABLE-LABEL: noreturn:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #4
+; THUMB-DISABLE-NEXT:    cbnz r0, LBB8_2
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %if.end
+; THUMB-DISABLE-NEXT:    movs r0, #42
+; THUMB-DISABLE-NEXT:    pop {r4, r7, pc}
+; THUMB-DISABLE-NEXT:  LBB8_2: @ %if.abort
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r0, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    bl _abort
 entry:
   %tobool = icmp eq i8 %bad_thing, 0
   br i1 %tobool, label %if.end, label %if.abort
@@ -546,9 +1352,142 @@ attributes #0 = { noreturn nounwind }
 ; dominator is itself. In this case, we cannot perform shrink wrapping, but we
 ; should return gracefully and continue compilation.
 ; The only condition for this test is the compilation finishes correctly.
-; CHECK-LABEL: infiniteloop
-; CHECK: pop
+; infiniteloop
+; pop
 define void @infiniteloop() "no-frame-pointer-elim"="true" {
+; ARM-LABEL: infiniteloop:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    push {r4, r5, r7, lr}
+; ARM-NEXT:    add r7, sp, #8
+; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    cmp r0, #0
+; ARM-NEXT:    bne LBB9_3
+; ARM-NEXT:  @ %bb.1: @ %if.then
+; ARM-NEXT:    sub r1, sp, #16
+; ARM-NEXT:    mov sp, r1
+; ARM-NEXT:  LBB9_2: @ %for.body
+; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-NEXT:    @ InlineAsm Start
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    @ InlineAsm End
+; ARM-NEXT:    add r0, r2, r0
+; ARM-NEXT:    str r0, [r1]
+; ARM-NEXT:    b LBB9_2
+; ARM-NEXT:  LBB9_3: @ %if.end
+; ARM-NEXT:    sub sp, r7, #8
+; ARM-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB-LABEL: infiniteloop:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    push {r4, r5, r7, lr}
+; THUMB-NEXT:    add r7, sp, #8
+; THUMB-NEXT:    movs r0, #0
+; THUMB-NEXT:    cbnz r0, LBB9_3
+; THUMB-NEXT:  @ %bb.1: @ %if.then
+; THUMB-NEXT:    sub.w r0, sp, #16
+; THUMB-NEXT:    mov sp, r0
+; THUMB-NEXT:    movs r1, #0
+; THUMB-NEXT:  LBB9_2: @ %for.body
+; THUMB-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-NEXT:    @ InlineAsm Start
+; THUMB-NEXT:    mov.w r2, #1
+; THUMB-NEXT:    @ InlineAsm End
+; THUMB-NEXT:    add r1, r2
+; THUMB-NEXT:    str r1, [r0]
+; THUMB-NEXT:    b LBB9_2
+; THUMB-NEXT:  LBB9_3: @ %if.end
+; THUMB-NEXT:    sub.w r4, r7, #8
+; THUMB-NEXT:    mov sp, r4
+; THUMB-NEXT:    pop {r4, r5, r7, pc}
+; ARM-ENABLE-LABEL: infiniteloop:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    push {r4, r5, r7, lr}
+; ARM-ENABLE-NEXT:    add r7, sp, #8
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    bne LBB9_3
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-ENABLE-NEXT:    sub r1, sp, #16
+; ARM-ENABLE-NEXT:    mov sp, r1
+; ARM-ENABLE-NEXT:  LBB9_2: @ %for.body
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    @ InlineAsm Start
+; ARM-ENABLE-NEXT:    mov r2, #1
+; ARM-ENABLE-NEXT:    @ InlineAsm End
+; ARM-ENABLE-NEXT:    add r0, r2, r0
+; ARM-ENABLE-NEXT:    str r0, [r1]
+; ARM-ENABLE-NEXT:    b LBB9_2
+; ARM-ENABLE-NEXT:  LBB9_3: @ %if.end
+; ARM-ENABLE-NEXT:    sub sp, r7, #8
+; ARM-ENABLE-NEXT:    pop {r4, r5, r7, pc}
+;
+; ARM-DISABLE-LABEL: infiniteloop:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    push {r4, r5, r7, lr}
+; ARM-DISABLE-NEXT:    add r7, sp, #8
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    bne LBB9_3
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; ARM-DISABLE-NEXT:    sub r1, sp, #16
+; ARM-DISABLE-NEXT:    mov sp, r1
+; ARM-DISABLE-NEXT:  LBB9_2: @ %for.body
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    @ InlineAsm Start
+; ARM-DISABLE-NEXT:    mov r2, #1
+; ARM-DISABLE-NEXT:    @ InlineAsm End
+; ARM-DISABLE-NEXT:    add r0, r2, r0
+; ARM-DISABLE-NEXT:    str r0, [r1]
+; ARM-DISABLE-NEXT:    b LBB9_2
+; ARM-DISABLE-NEXT:  LBB9_3: @ %if.end
+; ARM-DISABLE-NEXT:    sub sp, r7, #8
+; ARM-DISABLE-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB-ENABLE-LABEL: infiniteloop:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    push {r4, r5, r7, lr}
+; THUMB-ENABLE-NEXT:    add r7, sp, #8
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    cbnz r0, LBB9_3
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-ENABLE-NEXT:    sub.w r0, sp, #16
+; THUMB-ENABLE-NEXT:    mov sp, r0
+; THUMB-ENABLE-NEXT:    movs r1, #0
+; THUMB-ENABLE-NEXT:  LBB9_2: @ %for.body
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    @ InlineAsm Start
+; THUMB-ENABLE-NEXT:    mov.w r2, #1
+; THUMB-ENABLE-NEXT:    @ InlineAsm End
+; THUMB-ENABLE-NEXT:    add r1, r2
+; THUMB-ENABLE-NEXT:    str r1, [r0]
+; THUMB-ENABLE-NEXT:    b LBB9_2
+; THUMB-ENABLE-NEXT:  LBB9_3: @ %if.end
+; THUMB-ENABLE-NEXT:    sub.w r4, r7, #8
+; THUMB-ENABLE-NEXT:    mov sp, r4
+; THUMB-ENABLE-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB-DISABLE-LABEL: infiniteloop:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    push {r4, r5, r7, lr}
+; THUMB-DISABLE-NEXT:    add r7, sp, #8
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    cbnz r0, LBB9_3
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %if.then
+; THUMB-DISABLE-NEXT:    sub.w r0, sp, #16
+; THUMB-DISABLE-NEXT:    mov sp, r0
+; THUMB-DISABLE-NEXT:    movs r1, #0
+; THUMB-DISABLE-NEXT:  LBB9_2: @ %for.body
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    @ InlineAsm Start
+; THUMB-DISABLE-NEXT:    mov.w r2, #1
+; THUMB-DISABLE-NEXT:    @ InlineAsm End
+; THUMB-DISABLE-NEXT:    add r1, r2
+; THUMB-DISABLE-NEXT:    str r1, [r0]
+; THUMB-DISABLE-NEXT:    b LBB9_2
+; THUMB-DISABLE-NEXT:  LBB9_3: @ %if.end
+; THUMB-DISABLE-NEXT:    sub.w r4, r7, #8
+; THUMB-DISABLE-NEXT:    mov sp, r4
+; THUMB-DISABLE-NEXT:    pop {r4, r5, r7, pc}
 entry:
   br i1 undef, label %if.then, label %if.end
 
@@ -568,8 +1507,8 @@ if.end:
 }
 
 ; Another infinite loop test this time with a body bigger than just one block.
-; CHECK-LABEL: infiniteloop2
-; CHECK: pop
+; infiniteloop2
+; pop
 define void @infiniteloop2() "no-frame-pointer-elim"="true" {
 entry:
   br i1 undef, label %if.then, label %if.end
@@ -598,9 +1537,169 @@ if.end:
 }
 
 ; Another infinite loop test this time with two nested infinite loop.
-; CHECK-LABEL: infiniteloop3
-; CHECK: bx lr
+; infiniteloop3
+; bx lr
 define void @infiniteloop3() "no-frame-pointer-elim"="true" {
+; ARM-LABEL: infiniteloop3:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    mov r0, #0
+; ARM-NEXT:    cmp r0, #0
+; ARM-NEXT:    bne LBB11_5
+; ARM-NEXT:  @ %bb.1: @ %loop2a.preheader
+; ARM-NEXT:    mov r1, #0
+; ARM-NEXT:    mov r2, r0
+; ARM-NEXT:    b LBB11_3
+; ARM-NEXT:  LBB11_2: @ %loop2b
+; ARM-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; ARM-NEXT:    str r1, [r2]
+; ARM-NEXT:    mov r2, r1
+; ARM-NEXT:    mov r1, r3
+; ARM-NEXT:  LBB11_3: @ %loop1
+; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-NEXT:    ldr r3, [r0]
+; ARM-NEXT:    cmp r0, #0
+; ARM-NEXT:    bne LBB11_2
+; ARM-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; ARM-NEXT:    mov r0, r1
+; ARM-NEXT:    mov r1, r3
+; ARM-NEXT:    mov r2, r0
+; ARM-NEXT:    b LBB11_3
+; ARM-NEXT:  LBB11_5: @ %end
+; ARM-NEXT:    bx lr
+;
+; THUMB-LABEL: infiniteloop3:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    movs r0, #0
+; THUMB-NEXT:    cbnz r0, LBB11_5
+; THUMB-NEXT:  @ %bb.1: @ %loop2a.preheader
+; THUMB-NEXT:    movs r0, #0
+; THUMB-NEXT:    movs r1, #0
+; THUMB-NEXT:    mov r2, r0
+; THUMB-NEXT:    b LBB11_3
+; THUMB-NEXT:  LBB11_2: @ %loop2b
+; THUMB-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; THUMB-NEXT:    str r1, [r2]
+; THUMB-NEXT:    mov r2, r1
+; THUMB-NEXT:    mov r1, r3
+; THUMB-NEXT:  LBB11_3: @ %loop1
+; THUMB-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-NEXT:    ldr r3, [r0]
+; THUMB-NEXT:    cmp r0, #0
+; THUMB-NEXT:    bne LBB11_2
+; THUMB-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; THUMB-NEXT:    mov r0, r1
+; THUMB-NEXT:    mov r1, r3
+; THUMB-NEXT:    mov r2, r0
+; THUMB-NEXT:    b LBB11_3
+; THUMB-NEXT:  LBB11_5: @ %end
+; THUMB-NEXT:    bx lr
+; ARM-ENABLE-LABEL: infiniteloop3:
+; ARM-ENABLE:       @ %bb.0: @ %entry
+; ARM-ENABLE-NEXT:    mov r0, #0
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    bne LBB11_5
+; ARM-ENABLE-NEXT:  @ %bb.1: @ %loop2a.preheader
+; ARM-ENABLE-NEXT:    mov r1, #0
+; ARM-ENABLE-NEXT:    mov r2, r0
+; ARM-ENABLE-NEXT:    b LBB11_3
+; ARM-ENABLE-NEXT:  LBB11_2: @ %loop2b
+; ARM-ENABLE-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; ARM-ENABLE-NEXT:    str r1, [r2]
+; ARM-ENABLE-NEXT:    mov r2, r1
+; ARM-ENABLE-NEXT:    mov r1, r3
+; ARM-ENABLE-NEXT:  LBB11_3: @ %loop1
+; ARM-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-ENABLE-NEXT:    ldr r3, [r0]
+; ARM-ENABLE-NEXT:    cmp r0, #0
+; ARM-ENABLE-NEXT:    bne LBB11_2
+; ARM-ENABLE-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; ARM-ENABLE-NEXT:    mov r0, r1
+; ARM-ENABLE-NEXT:    mov r1, r3
+; ARM-ENABLE-NEXT:    mov r2, r0
+; ARM-ENABLE-NEXT:    b LBB11_3
+; ARM-ENABLE-NEXT:  LBB11_5: @ %end
+; ARM-ENABLE-NEXT:    bx lr
+;
+; ARM-DISABLE-LABEL: infiniteloop3:
+; ARM-DISABLE:       @ %bb.0: @ %entry
+; ARM-DISABLE-NEXT:    mov r0, #0
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    bne LBB11_5
+; ARM-DISABLE-NEXT:  @ %bb.1: @ %loop2a.preheader
+; ARM-DISABLE-NEXT:    mov r1, #0
+; ARM-DISABLE-NEXT:    mov r2, r0
+; ARM-DISABLE-NEXT:    b LBB11_3
+; ARM-DISABLE-NEXT:  LBB11_2: @ %loop2b
+; ARM-DISABLE-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; ARM-DISABLE-NEXT:    str r1, [r2]
+; ARM-DISABLE-NEXT:    mov r2, r1
+; ARM-DISABLE-NEXT:    mov r1, r3
+; ARM-DISABLE-NEXT:  LBB11_3: @ %loop1
+; ARM-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ARM-DISABLE-NEXT:    ldr r3, [r0]
+; ARM-DISABLE-NEXT:    cmp r0, #0
+; ARM-DISABLE-NEXT:    bne LBB11_2
+; ARM-DISABLE-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; ARM-DISABLE-NEXT:    mov r0, r1
+; ARM-DISABLE-NEXT:    mov r1, r3
+; ARM-DISABLE-NEXT:    mov r2, r0
+; ARM-DISABLE-NEXT:    b LBB11_3
+; ARM-DISABLE-NEXT:  LBB11_5: @ %end
+; ARM-DISABLE-NEXT:    bx lr
+;
+; THUMB-ENABLE-LABEL: infiniteloop3:
+; THUMB-ENABLE:       @ %bb.0: @ %entry
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    cbnz r0, LBB11_5
+; THUMB-ENABLE-NEXT:  @ %bb.1: @ %loop2a.preheader
+; THUMB-ENABLE-NEXT:    movs r0, #0
+; THUMB-ENABLE-NEXT:    movs r1, #0
+; THUMB-ENABLE-NEXT:    mov r2, r0
+; THUMB-ENABLE-NEXT:    b LBB11_3
+; THUMB-ENABLE-NEXT:  LBB11_2: @ %loop2b
+; THUMB-ENABLE-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; THUMB-ENABLE-NEXT:    str r1, [r2]
+; THUMB-ENABLE-NEXT:    mov r2, r1
+; THUMB-ENABLE-NEXT:    mov r1, r3
+; THUMB-ENABLE-NEXT:  LBB11_3: @ %loop1
+; THUMB-ENABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-ENABLE-NEXT:    ldr r3, [r0]
+; THUMB-ENABLE-NEXT:    cmp r0, #0
+; THUMB-ENABLE-NEXT:    bne LBB11_2
+; THUMB-ENABLE-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; THUMB-ENABLE-NEXT:    mov r0, r1
+; THUMB-ENABLE-NEXT:    mov r1, r3
+; THUMB-ENABLE-NEXT:    mov r2, r0
+; THUMB-ENABLE-NEXT:    b LBB11_3
+; THUMB-ENABLE-NEXT:  LBB11_5: @ %end
+; THUMB-ENABLE-NEXT:    bx lr
+;
+; THUMB-DISABLE-LABEL: infiniteloop3:
+; THUMB-DISABLE:       @ %bb.0: @ %entry
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    cbnz r0, LBB11_5
+; THUMB-DISABLE-NEXT:  @ %bb.1: @ %loop2a.preheader
+; THUMB-DISABLE-NEXT:    movs r0, #0
+; THUMB-DISABLE-NEXT:    movs r1, #0
+; THUMB-DISABLE-NEXT:    mov r2, r0
+; THUMB-DISABLE-NEXT:    b LBB11_3
+; THUMB-DISABLE-NEXT:  LBB11_2: @ %loop2b
+; THUMB-DISABLE-NEXT:    @ in Loop: Header=BB11_3 Depth=1
+; THUMB-DISABLE-NEXT:    str r1, [r2]
+; THUMB-DISABLE-NEXT:    mov r2, r1
+; THUMB-DISABLE-NEXT:    mov r1, r3
+; THUMB-DISABLE-NEXT:  LBB11_3: @ %loop1
+; THUMB-DISABLE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMB-DISABLE-NEXT:    ldr r3, [r0]
+; THUMB-DISABLE-NEXT:    cmp r0, #0
+; THUMB-DISABLE-NEXT:    bne LBB11_2
+; THUMB-DISABLE-NEXT:  @ %bb.4: @ in Loop: Header=BB11_3 Depth=1
+; THUMB-DISABLE-NEXT:    mov r0, r1
+; THUMB-DISABLE-NEXT:    mov r1, r3
+; THUMB-DISABLE-NEXT:    mov r2, r0
+; THUMB-DISABLE-NEXT:    b LBB11_3
+; THUMB-DISABLE-NEXT:  LBB11_5: @ %end
+; THUMB-DISABLE-NEXT:    bx lr
 entry:
   br i1 undef, label %loop2a, label %body
 
@@ -636,31 +1735,127 @@ declare double @llvm.pow.f64(double, dou
 ; exercise the path where we were dereferencing the end iterator
 ; to access debug info location while inserting the spill code
 ; during PEI with shrink-wrapping enable.
-; CHECK-LABEL: debug_info:
+; debug_info:
 ;
-; ENABLE: {{tst  r2, #1|lsls r1, r2, #31}}
-; ENABLE-NEXT: beq      [[BB13:LBB[0-9_]+]]
+; {{tst  r2, #1|lsls r1, r2, #31}}
+; beq      [[BB13:LBB[0-9_]+]]
 ;
-; CHECK: push
+; push
 ;
-; DISABLE: {{tst  r2, #1|lsls r1, r2, #31}}
-; DISABLE: beq      [[BB13:LBB[0-9_]+]]
+; {{tst  r2, #1|lsls r1, r2, #31}}
+; beq      [[BB13:LBB[0-9_]+]]
 ;
-; CHECK: bl{{x?}} _pow
+; bl{{x?}} _pow
 ;
 ;
-; ENABLE: pop
+; pop
 ;
-; CHECK: [[BB13]]:
-; CHECK: vldr
+; [[BB13]]:
+; vldr
 ;
-; DISABLE: pop
+; pop
 ;
 ; FIXME: This is flakey passing by finding 'bl' somewhere amongst the debug
 ; info (like labels named 'line_table) not because it's found a bl instruction.
 ;
-; CHECK: bl
+; bl
 define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" {
+; ARM-LABEL: debug_info:
+; ARM:       @ %bb.0: @ %bb
+; ARM-NEXT:    push {r4, r7, lr}
+; ARM-NEXT:    add r7, sp, #4
+; ARM-NEXT:    sub r4, sp, #16
+; ARM-NEXT:    bfc r4, #0, #4
+; ARM-NEXT:    mov sp, r4
+; ARM-NEXT:    tst r2, #1
+; ARM-NEXT:    vst1.64 {d8, d9}, [r4:128]
+; ARM-NEXT:    beq LBB12_2
+; ARM-NEXT:  @ %bb.1: @ %bb3
+; ARM-NEXT:    ldr r1, [r7, #8]
+; ARM-NEXT:    vmov s16, r0
+; ARM-NEXT:    mov r0, r3
+; ARM-NEXT:    mov r2, r3
+; ARM-NEXT:    vmov d9, r3, r1
+; ARM-NEXT:    mov r3, r1
+; ARM-NEXT:    bl _pow
+; ARM-NEXT:    vmov.f32 s0, #1.000000e+00
+; ARM-NEXT:    vmov.f64 d16, #1.000000e+00
+; ARM-NEXT:    vadd.f64 d16, d9, d16
+; ARM-NEXT:    vcmpe.f32 s16, s0
+; ARM-NEXT:    vmrs APSR_nzcv, fpscr
+; ARM-NEXT:    vmov d17, r0, r1
+; ARM-NEXT:    vmov.f64 d18, d9
+; ARM-NEXT:    vadd.f64 d17, d17, d17
+; ARM-NEXT:    vmovgt.f64 d18, d16
+; ARM-NEXT:    vcmp.f64 d18, d9
+; ARM-NEXT:    vmrs APSR_nzcv, fpscr
+; ARM-NEXT:    vmovne.f64 d9, d17
+; ARM-NEXT:    vcvt.f32.f64 s0, d9
+; ARM-NEXT:    b LBB12_3
+; ARM-NEXT:  LBB12_2:
+; ARM-NEXT:    vldr s0, LCPI12_0
+; ARM-NEXT:  LBB12_3: @ %bb13
+; ARM-NEXT:    mov r4, sp
+; ARM-NEXT:    vld1.64 {d8, d9}, [r4:128]
+; ARM-NEXT:    vmov r0, s0
+; ARM-NEXT:    sub sp, r7, #4
+; ARM-NEXT:    pop {r4, r7, pc}
+; ARM-NEXT:    .p2align 2
+; ARM-NEXT:  @ %bb.4:
+; ARM-NEXT:    .data_region
+; ARM-NEXT:  LCPI12_0:
+; ARM-NEXT:    .long 0 @ float 0
+; ARM-NEXT:    .end_data_region
+;
+; THUMB-LABEL: debug_info:
+; THUMB:       @ %bb.0: @ %bb
+; THUMB-NEXT:    push {r4, r7, lr}
+; THUMB-NEXT:    add r7, sp, #4
+; THUMB-NEXT:    sub.w r4, sp, #16
+; THUMB-NEXT:    bfc r4, #0, #4
+; THUMB-NEXT:    mov sp, r4
+; THUMB-NEXT:    lsls r1, r2, #31
+; THUMB-NEXT:    vst1.64 {d8, d9}, [r4:128]
+; THUMB-NEXT:    beq LBB12_2
+; THUMB-NEXT:  @ %bb.1: @ %bb3
+; THUMB-NEXT:    ldr r1, [r7, #8]
+; THUMB-NEXT:    vmov s16, r0
+; THUMB-NEXT:    mov r0, r3
+; THUMB-NEXT:    mov r2, r3
+; THUMB-NEXT:    vmov d9, r3, r1
+; THUMB-NEXT:    mov r3, r1
+; THUMB-NEXT:    bl _pow
+; THUMB-NEXT:    vmov.f32 s0, #1.000000e+00
+; THUMB-NEXT:    vmov.f64 d16, #1.000000e+00
+; THUMB-NEXT:    vmov.f64 d18, d9
+; THUMB-NEXT:    vcmpe.f32 s16, s0
+; THUMB-NEXT:    vadd.f64 d16, d9, d16
+; THUMB-NEXT:    vmrs APSR_nzcv, fpscr
+; THUMB-NEXT:    it gt
+; THUMB-NEXT:    vmovgt.f64 d18, d16
+; THUMB-NEXT:    vcmp.f64 d18, d9
+; THUMB-NEXT:    vmov d17, r0, r1
+; THUMB-NEXT:    vmrs APSR_nzcv, fpscr
+; THUMB-NEXT:    vadd.f64 d17, d17, d17
+; THUMB-NEXT:    it ne
+; THUMB-NEXT:    vmovne.f64 d9, d17
+; THUMB-NEXT:    vcvt.f32.f64 s0, d9
+; THUMB-NEXT:    b LBB12_3
+; THUMB-NEXT:  LBB12_2:
+; THUMB-NEXT:    vldr s0, LCPI12_0
+; THUMB-NEXT:  LBB12_3: @ %bb13
+; THUMB-NEXT:    mov r4, sp
+; THUMB-NEXT:    vld1.64 {d8, d9}, [r4:128]
+; THUMB-NEXT:    subs r4, r7, #4
+; THUMB-NEXT:    vmov r0, s0
+; THUMB-NEXT:    mov sp, r4
+; THUMB-NEXT:    pop {r4, r7, pc}
+; THUMB-NEXT:    .p2align 2
+; THUMB-NEXT:  @ %bb.4:
+; THUMB-NEXT:    .data_region
+; THUMB-NEXT:  LCPI12_0:
+; THUMB-NEXT:    .long 0 @ float 0
+; THUMB-NEXT:    .end_data_region
 bb:
   br i1 %or.cond, label %bb3, label %bb13
 

Modified: llvm/trunk/test/CodeGen/PowerPC/BreakableToken-reduced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/BreakableToken-reduced.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/BreakableToken-reduced.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/BreakableToken-reduced.ll Thu Jun 13 06:56:19 2019
@@ -203,8 +203,8 @@ target triple = "powerpc64le-unknown-lin
 ; Load a value into R0 before saving the LR
 ; CHECK: lwz 0, {{[0-9]+([0-9]+)}}
 
-; Ensure the LR is saved using a different register
-; CHECK: mflr {{[1-9]+}}
+; Ensure the LR is saved using a different register - edit:D63152 prevents stack pop befor loads and stores
+; CHECK-NOT: mflr {{[1-9]+}}
 
 ; Ensure the LR is restored using a different register
 ; CHECK: mtlr {{[0-9]+}}

Modified: llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll Thu Jun 13 06:56:19 2019
@@ -10,7 +10,8 @@ entry:
   br i1 undef, label %land.lhs.true, label %if.end
 
 ; CHECK: # %land.lhs.true
-; CHECK-NEXT: bclr
+; Test updated due D63152 where any load/store prevents shrink-wrapping
+; CHECK-NEXT: bc
 ; CHECK-NEXT: # %if.end4
 land.lhs.true:                                    ; preds = %entry
   br i1 undef, label %return, label %if.end4

Modified: llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/licm-tocReg.ll Thu Jun 13 06:56:19 2019
@@ -61,11 +61,11 @@
 
 @ga = external global i32, align 4
 @gb = external global i32, align 4
-
 define signext i32 @test(i32 (i32)* nocapture %FP) local_unnamed_addr #0 {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis 4, 2, .LC0 at toc@ha
+; CHECK-NEXT:    mflr 0
+; CHECK:         addis 4, 2, .LC0 at toc@ha
 ; CHECK-NEXT:    addis 5, 2, .LC1 at toc@ha
 ; CHECK-NEXT:    mr 12, 3
 ; CHECK-NEXT:    ld 4, .LC0 at toc@l(4)

Modified: llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll Thu Jun 13 06:56:19 2019
@@ -1,8 +1,8 @@
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
-; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
+; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
-; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
-
+; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY
+; Edit: D63152 prevents stack popping before loads and stores, so shrink-wrap does nothing here
 %"class.clang::NamedDecl" = type { i32 }
 declare void @__assert_fail();
 

Modified: llvm/trunk/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/xray-ret-is-terminator.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/xray-ret-is-terminator.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/xray-ret-is-terminator.ll Thu Jun 13 06:56:19 2019
@@ -2,9 +2,9 @@
 
 define void @ILLBeBack() #0 {
 ; CHECK-LABEL @ILLBeBack
-; CHECK: beq {{[0-9]+}}, [[LABEL:\.[a-zA-Z0-9_]+]]
-; CHECK: bl __xray_FunctionExit
+; CHECK: bne {{[0-9]+}}, [[LABEL:\.[a-zA-Z0-9_]+]]
 ; CHECK: [[LABEL]]:
+; CHECK: bl __xray_FunctionExit
 bb:
   br i1 undef, label %bb1, label %bb8
 

Modified: llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll Thu Jun 13 06:56:19 2019
@@ -1,11 +1,8 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
-; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
-; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
-; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho \
-; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=ENABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=ENABLE-V5T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumb-macho | FileCheck %s --check-prefix=DISABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -tail-dup-placement=0 -mtriple=thumbv5-macho | FileCheck %s --check-prefix=DISABLE-V5T
 
 ;
 ; Note: Lots of tests use inline asm instead of regular calls.
@@ -20,49 +17,108 @@
 ; results due to branches not being analyzable under v5
 
 ; Initial motivating example: Simple diamond with a call just on one side.
-; CHECK-LABEL: foo:
-;
-; Compare the arguments and jump to exit.
-; No prologue needed.
-; ENABLE: cmp r0, r1
-; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: push {r7, lr}
-; CHECK: sub sp, #8
-;
-; Compare the arguments and jump to exit.
-; After the prologue is set.
-; DISABLE: cmp r0, r1
-; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Store %a in the alloca.
-; CHECK: str r0, [sp, #4]
-; Set the alloca address in the second argument.
-; Set the first argument to zero.
-; CHECK: movs r0, #0
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: bl
-;
-; With shrink-wrapping, epilogue is just after the call.
-; ENABLE-NEXT: add sp, #8
-; ENABLE-V5T-NEXT: pop {r7, pc}
-; ENABLE-V4T-NEXT: pop {r7}
-; ENABLE-V4T-NEXT: pop {r1}
-; ENABLE-V4T-NEXT: mov lr, r1
-;
-; CHECK: [[EXIT_LABEL]]:
-;
-; Without shrink-wrapping, epilogue is in the exit block.
-; Epilogue code. (What we pop does not matter.)
-; DISABLE: add sp, #8
-; DISABLE-V5T-NEXT: pop {r7, pc}
-; DISABLE-V4T-NEXT: pop {r7}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-NEXT: bx lr
 define i32 @foo(i32 %a, i32 %b) {
+; ENABLE-V4T-LABEL: foo:
+; ENABLE-V4T:       @ %bb.0:
+; ENABLE-V4T-NEXT:    cmp r0, r1
+; ENABLE-V4T-NEXT:    bge LBB0_2
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %true
+; ENABLE-V4T-NEXT:    push {r7, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V4T-NEXT:    sub sp, #8
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-V4T-NEXT:    str r0, [sp, #4]
+; ENABLE-V4T-NEXT:    ldr r0, LCPI0_0
+; ENABLE-V4T-NEXT:  LPC0_0:
+; ENABLE-V4T-NEXT:    add r0, pc
+; ENABLE-V4T-NEXT:    ldr r2, [r0]
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    add r1, sp, #4
+; ENABLE-V4T-NEXT:    bl Ltmp0
+; ENABLE-V4T-NEXT:    add sp, #8
+; ENABLE-V4T-NEXT:    pop {r7}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    mov lr, r1
+; ENABLE-V4T-NEXT:  LBB0_2: @ %false
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    .p2align 2
+; ENABLE-V4T-NEXT:  @ %bb.3:
+; ENABLE-V4T-NEXT:    .data_region
+; ENABLE-V4T-NEXT:  LCPI0_0:
+; ENABLE-V4T-NEXT:    .long L_doSomething$non_lazy_ptr-(LPC0_0+4)
+; ENABLE-V4T-NEXT:    .end_data_region
+;
+; ENABLE-V5T-LABEL: foo:
+; ENABLE-V5T:       @ %bb.0:
+; ENABLE-V5T-NEXT:    cmp r0, r1
+; ENABLE-V5T-NEXT:    bge LBB0_2
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %true
+; ENABLE-V5T-NEXT:    push {r7, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V5T-NEXT:    sub sp, #8
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-V5T-NEXT:    str r0, [sp, #4]
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    add r1, sp, #4
+; ENABLE-V5T-NEXT:    bl _doSomething
+; ENABLE-V5T-NEXT:    add sp, #8
+; ENABLE-V5T-NEXT:    pop {r7, pc}
+; ENABLE-V5T-NEXT:  LBB0_2: @ %false
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: foo:
+; DISABLE-V4T:       @ %bb.0:
+; DISABLE-V4T-NEXT:    push {r7, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V4T-NEXT:    sub sp, #8
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-V4T-NEXT:    cmp r0, r1
+; DISABLE-V4T-NEXT:    bge LBB0_2
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %true
+; DISABLE-V4T-NEXT:    str r0, [sp, #4]
+; DISABLE-V4T-NEXT:    ldr r0, LCPI0_0
+; DISABLE-V4T-NEXT:  LPC0_0:
+; DISABLE-V4T-NEXT:    add r0, pc
+; DISABLE-V4T-NEXT:    ldr r2, [r0]
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    add r1, sp, #4
+; DISABLE-V4T-NEXT:    bl Ltmp0
+; DISABLE-V4T-NEXT:  LBB0_2: @ %false
+; DISABLE-V4T-NEXT:    add sp, #8
+; DISABLE-V4T-NEXT:    pop {r7}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:    .p2align 2
+; DISABLE-V4T-NEXT:  @ %bb.3:
+; DISABLE-V4T-NEXT:    .data_region
+; DISABLE-V4T-NEXT:  LCPI0_0:
+; DISABLE-V4T-NEXT:    .long L_doSomething$non_lazy_ptr-(LPC0_0+4)
+; DISABLE-V4T-NEXT:    .end_data_region
+;
+; DISABLE-V5T-LABEL: foo:
+; DISABLE-V5T:       @ %bb.0:
+; DISABLE-V5T-NEXT:    push {r7, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V5T-NEXT:    sub sp, #8
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-V5T-NEXT:    cmp r0, r1
+; DISABLE-V5T-NEXT:    bge LBB0_2
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %true
+; DISABLE-V5T-NEXT:    str r0, [sp, #4]
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    add r1, sp, #4
+; DISABLE-V5T-NEXT:    bl _doSomething
+; DISABLE-V5T-NEXT:  LBB0_2: @ %false
+; DISABLE-V5T-NEXT:    add sp, #8
+; DISABLE-V5T-NEXT:    pop {r7, pc}
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
@@ -79,27 +135,114 @@ false:
 
 
 ; Same, but the final BB is non-trivial, so we don't duplicate the return inst.
-; CHECK-LABEL: bar:
-;
-; With shrink-wrapping, epilogue is just after the call.
-; CHECK: bl
-; ENABLE-NEXT: add sp, #8
-; ENABLE-NEXT: pop {r7}
-; ENABLE-NEXT: pop {r0}
-; ENABLE-NEXT: mov lr, r0
-;
-; CHECK: movs r0, #42
-;
-; Without shrink-wrapping, epilogue is in the exit block.
-; Epilogue code. (What we pop does not matter.)
-; DISABLE: add sp, #8
-; DISABLE-V5T-NEXT: pop {r7, pc}
-; DISABLE-V4T-NEXT: pop {r7}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-NEXT: bx lr
 define i32 @bar(i32 %a, i32 %b) {
+; ENABLE-V4T-LABEL: bar:
+; ENABLE-V4T:       @ %bb.0:
+; ENABLE-V4T-NEXT:    cmp r0, r1
+; ENABLE-V4T-NEXT:    bge LBB1_2
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %true
+; ENABLE-V4T-NEXT:    push {r7, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V4T-NEXT:    sub sp, #8
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-V4T-NEXT:    str r0, [sp, #4]
+; ENABLE-V4T-NEXT:    ldr r0, LCPI1_0
+; ENABLE-V4T-NEXT:  LPC1_0:
+; ENABLE-V4T-NEXT:    add r0, pc
+; ENABLE-V4T-NEXT:    ldr r2, [r0]
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    add r1, sp, #4
+; ENABLE-V4T-NEXT:    bl Ltmp1
+; ENABLE-V4T-NEXT:    add sp, #8
+; ENABLE-V4T-NEXT:    pop {r7}
+; ENABLE-V4T-NEXT:    pop {r0}
+; ENABLE-V4T-NEXT:    mov lr, r0
+; ENABLE-V4T-NEXT:  LBB1_2: @ %false
+; ENABLE-V4T-NEXT:    movs r0, #42
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    .p2align 2
+; ENABLE-V4T-NEXT:  @ %bb.3:
+; ENABLE-V4T-NEXT:    .data_region
+; ENABLE-V4T-NEXT:  LCPI1_0:
+; ENABLE-V4T-NEXT:    .long L_doSomething$non_lazy_ptr-(LPC1_0+4)
+; ENABLE-V4T-NEXT:    .end_data_region
+;
+; ENABLE-V5T-LABEL: bar:
+; ENABLE-V5T:       @ %bb.0:
+; ENABLE-V5T-NEXT:    cmp r0, r1
+; ENABLE-V5T-NEXT:    bge LBB1_2
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %true
+; ENABLE-V5T-NEXT:    push {r7, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V5T-NEXT:    sub sp, #8
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-V5T-NEXT:    str r0, [sp, #4]
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    add r1, sp, #4
+; ENABLE-V5T-NEXT:    bl _doSomething
+; ENABLE-V5T-NEXT:    add sp, #8
+; ENABLE-V5T-NEXT:    pop {r7}
+; ENABLE-V5T-NEXT:    pop {r0}
+; ENABLE-V5T-NEXT:    mov lr, r0
+; ENABLE-V5T-NEXT:  LBB1_2: @ %false
+; ENABLE-V5T-NEXT:    movs r0, #42
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: bar:
+; DISABLE-V4T:       @ %bb.0:
+; DISABLE-V4T-NEXT:    push {r7, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V4T-NEXT:    sub sp, #8
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-V4T-NEXT:    cmp r0, r1
+; DISABLE-V4T-NEXT:    bge LBB1_2
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %true
+; DISABLE-V4T-NEXT:    str r0, [sp, #4]
+; DISABLE-V4T-NEXT:    ldr r0, LCPI1_0
+; DISABLE-V4T-NEXT:  LPC1_0:
+; DISABLE-V4T-NEXT:    add r0, pc
+; DISABLE-V4T-NEXT:    ldr r2, [r0]
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    add r1, sp, #4
+; DISABLE-V4T-NEXT:    bl Ltmp1
+; DISABLE-V4T-NEXT:  LBB1_2: @ %false
+; DISABLE-V4T-NEXT:    movs r0, #42
+; DISABLE-V4T-NEXT:    add sp, #8
+; DISABLE-V4T-NEXT:    pop {r7}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:    .p2align 2
+; DISABLE-V4T-NEXT:  @ %bb.3:
+; DISABLE-V4T-NEXT:    .data_region
+; DISABLE-V4T-NEXT:  LCPI1_0:
+; DISABLE-V4T-NEXT:    .long L_doSomething$non_lazy_ptr-(LPC1_0+4)
+; DISABLE-V4T-NEXT:    .end_data_region
+;
+; DISABLE-V5T-LABEL: bar:
+; DISABLE-V5T:       @ %bb.0:
+; DISABLE-V5T-NEXT:    push {r7, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V5T-NEXT:    sub sp, #8
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-V5T-NEXT:    cmp r0, r1
+; DISABLE-V5T-NEXT:    bge LBB1_2
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %true
+; DISABLE-V5T-NEXT:    str r0, [sp, #4]
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    add r1, sp, #4
+; DISABLE-V5T-NEXT:    bl _doSomething
+; DISABLE-V5T-NEXT:  LBB1_2: @ %false
+; DISABLE-V5T-NEXT:    movs r0, #42
+; DISABLE-V5T-NEXT:    add sp, #8
+; DISABLE-V5T-NEXT:    pop {r7, pc}
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
@@ -119,51 +262,128 @@ declare i32 @doSomething(i32, i32*)
 
 ; Check that we do not perform the restore inside the loop whereas the save
 ; is outside.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
-;
-; Shrink-wrapping allows to skip the prologue in the else case.
-; ENABLE: cmp r0, #0
-; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, lr}
-;
-; DISABLE: cmp r0, #0
-; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; SUM is in r0 because it is coalesced with the second
-; argument on the else path.
-; CHECK: movs [[SUM:r0]], #0
-; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
-;
-; Next BB.
-; SUM << 3.
-; CHECK: lsls [[SUM]], [[SUM]], #3
-;
-; Duplicated epilogue.
-; DISABLE-V5T: pop {r4, pc}
-; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
-; DISABLE-V5T-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-V4T-NEXT: pop {r4}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    beq LBB2_4
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    movs r1, #10
+; ENABLE-V4T-NEXT:  LBB2_2: @ %for.body
+; ENABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r2, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    adds r0, r2, r0
+; ENABLE-V4T-NEXT:    subs r1, r1, #1
+; ENABLE-V4T-NEXT:    bne LBB2_2
+; ENABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V4T-NEXT:    lsls r0, r0, #3
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB2_4: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+;
+; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    beq LBB2_4
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    movs r1, #10
+; ENABLE-V5T-NEXT:  LBB2_2: @ %for.body
+; ENABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r2, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    adds r0, r2, r0
+; ENABLE-V5T-NEXT:    subs r1, r1, #1
+; ENABLE-V5T-NEXT:    bne LBB2_2
+; ENABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V5T-NEXT:    lsls r0, r0, #3
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:  LBB2_4: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB2_5: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    beq LBB2_4
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    movs r1, #10
+; DISABLE-V4T-NEXT:  LBB2_2: @ %for.body
+; DISABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r2, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    adds r0, r2, r0
+; DISABLE-V4T-NEXT:    subs r1, r1, #1
+; DISABLE-V4T-NEXT:    bne LBB2_2
+; DISABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V4T-NEXT:    lsls r0, r0, #3
+; DISABLE-V4T-NEXT:    b LBB2_5
+; DISABLE-V4T-NEXT:  LBB2_4: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB2_5: @ %if.end
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+;
+; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    beq LBB2_4
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    movs r1, #10
+; DISABLE-V5T-NEXT:  LBB2_2: @ %for.body
+; DISABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r2, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    adds r0, r2, r0
+; DISABLE-V5T-NEXT:    subs r1, r1, #1
+; DISABLE-V5T-NEXT:    bne LBB2_2
+; DISABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V5T-NEXT:    lsls r0, r0, #3
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:  LBB2_4: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    pop {r4, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -198,26 +418,112 @@ declare i32 @something(...)
 
 ; Check that we do not perform the shrink-wrapping inside the loop even
 ; though that would be legal. The cost model must prevent that.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4
-; This is the nop.
-; CHECK: mov r8, r8
-; CHECK: movs [[SUM:r0]], #0
-; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
-; Next BB.
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: bne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: @ %for.exit
-; This is the nop.
-; CHECK: mov r8, r8
-; CHECK: pop {r4
 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+; ENABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    movs r1, #10
+; ENABLE-V4T-NEXT:  LBB3_1: @ %for.body
+; ENABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r2, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    adds r0, r2, r0
+; ENABLE-V4T-NEXT:    subs r1, r1, #1
+; ENABLE-V4T-NEXT:    bne LBB3_1
+; ENABLE-V4T-NEXT:  @ %bb.2: @ %for.exit
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+;
+; ENABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    movs r1, #10
+; ENABLE-V5T-NEXT:  LBB3_1: @ %for.body
+; ENABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r2, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    adds r0, r2, r0
+; ENABLE-V5T-NEXT:    subs r1, r1, #1
+; ENABLE-V5T-NEXT:    bne LBB3_1
+; ENABLE-V5T-NEXT:  @ %bb.2: @ %for.exit
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:  LBB3_3: @ %for.end
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: freqSaveAndRestoreOutsideLoop2:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    movs r1, #10
+; DISABLE-V4T-NEXT:  LBB3_1: @ %for.body
+; DISABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r2, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    adds r0, r2, r0
+; DISABLE-V4T-NEXT:    subs r1, r1, #1
+; DISABLE-V4T-NEXT:    bne LBB3_1
+; DISABLE-V4T-NEXT:  @ %bb.2: @ %for.exit
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+;
+; DISABLE-V5T-LABEL: freqSaveAndRestoreOutsideLoop2:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    movs r1, #10
+; DISABLE-V5T-NEXT:  LBB3_1: @ %for.body
+; DISABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r2, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    adds r0, r2, r0
+; DISABLE-V5T-NEXT:    subs r1, r1, #1
+; DISABLE-V5T-NEXT:    bne LBB3_1
+; DISABLE-V5T-NEXT:  @ %bb.2: @ %for.exit
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    pop {r4, pc}
 entry:
   br label %for.preheader
 
@@ -244,54 +550,140 @@ for.end:
 
 ; Check with a more complex case that we do not have save within the loop and
 ; restore outside.
-; CHECK-LABEL: loopInfoSaveOutsideLoop:
-;
-; ENABLE: cmp r0, #0
-; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, lr}
-;
-; DISABLE: cmp r0, #0
-; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; SUM is in r0 because it is coalesced with the second
-; argument on the else path.
-; CHECK: movs [[SUM:r0]], #0
-; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
-;
-; Next BB.
-; SUM << 3.
-; CHECK: lsls [[SUM]], [[SUM]], #3
-; ENABLE-V5T-NEXT: pop {r4, pc}
-; ENABLE-V4T-NEXT: pop {r4}
-; ENABLE-V4T-NEXT: pop {r1}
-; ENABLE-V4T-NEXT: bx r1
-;
-; Duplicated epilogue.
-; DISABLE-V5T: pop {r4, pc}
-; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
-; DISABLE-V5T-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-V4T-NEXT: pop {r4}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-V4T-LABEL: loopInfoSaveOutsideLoop:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    beq LBB4_4
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    movs r1, #10
+; ENABLE-V4T-NEXT:  LBB4_2: @ %for.body
+; ENABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r2, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    adds r0, r2, r0
+; ENABLE-V4T-NEXT:    subs r1, r1, #1
+; ENABLE-V4T-NEXT:    bne LBB4_2
+; ENABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    lsls r0, r0, #3
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB4_4: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+;
+; ENABLE-V5T-LABEL: loopInfoSaveOutsideLoop:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    beq LBB4_4
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    movs r1, #10
+; ENABLE-V5T-NEXT:  LBB4_2: @ %for.body
+; ENABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r2, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    adds r0, r2, r0
+; ENABLE-V5T-NEXT:    subs r1, r1, #1
+; ENABLE-V5T-NEXT:    bne LBB4_2
+; ENABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    lsls r0, r0, #3
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:  LBB4_4: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB4_5: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: loopInfoSaveOutsideLoop:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    beq LBB4_4
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    movs r1, #10
+; DISABLE-V4T-NEXT:  LBB4_2: @ %for.body
+; DISABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r2, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    adds r0, r2, r0
+; DISABLE-V4T-NEXT:    subs r1, r1, #1
+; DISABLE-V4T-NEXT:    bne LBB4_2
+; DISABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    lsls r0, r0, #3
+; DISABLE-V4T-NEXT:    b LBB4_5
+; DISABLE-V4T-NEXT:  LBB4_4: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB4_5: @ %if.end
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+;
+; DISABLE-V5T-LABEL: loopInfoSaveOutsideLoop:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    beq LBB4_4
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    movs r1, #10
+; DISABLE-V5T-NEXT:  LBB4_2: @ %for.body
+; DISABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r2, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    adds r0, r2, r0
+; DISABLE-V5T-NEXT:    subs r1, r1, #1
+; DISABLE-V5T-NEXT:    bne LBB4_2
+; DISABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    lsls r0, r0, #3
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:  LBB4_4: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    pop {r4, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -327,54 +719,156 @@ declare void @somethingElse(...)
 
 ; Check with a more complex case that we do not have restore within the loop and
 ; save outside.
-; CHECK-LABEL: loopInfoRestoreOutsideLoop:
-;
-; ENABLE: cmp r0, #0
-; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, lr}
-;
-; DISABLE-NEXT: cmp r0, #0
-; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; SUM is in r0 because it is coalesced with the second
-; argument on the else path.
-; CHECK: movs [[SUM:r0]], #0
-; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
-; CHECK-NEXT: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
-;
-; Next BB.
-; SUM << 3.
-; CHECK: lsls [[SUM]], [[SUM]], #3
-; ENABLE-V5T-NEXT: pop {r4, pc}
-; ENABLE-V4T-NEXT: pop {r4}
-; ENABLE-V4T-NEXT: pop {r1}
-; ENABLE-V4T-NEXT: bx r1
-;
-; Duplicated epilogue.
-; DISABLE-V5T: pop {r4, pc}
-; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
-; DISABLE-V5T-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-V4T-NEXT: pop {r4}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
+; ENABLE-V4T-LABEL: loopInfoRestoreOutsideLoop:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    beq LBB5_4
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    movs r1, #10
+; ENABLE-V4T-NEXT:  LBB5_2: @ %for.body
+; ENABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r2, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    adds r0, r2, r0
+; ENABLE-V4T-NEXT:    subs r1, r1, #1
+; ENABLE-V4T-NEXT:    bne LBB5_2
+; ENABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V4T-NEXT:    lsls r0, r0, #3
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB5_4: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    @ -- End function
+; ENABLE-V4T-NEXT:    .globl _emptyFrame @ -- Begin function emptyFrame
+; ENABLE-V4T-NEXT:    .p2align 1
+; ENABLE-V4T-NEXT:    .code 16 @ @emptyFrame
+; ENABLE-V4T-NEXT:    .thumb_func _emptyFrame
+; ENABLE-V4T-NEXT:  _emptyFrame:
+; ENABLE-V4T-NEXT:    .cfi_startproc
+; ENABLE-V4T-NEXT:  @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    bx lr
+;
+; ENABLE-V5T-LABEL: loopInfoRestoreOutsideLoop:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    beq LBB5_4
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    movs r1, #10
+; ENABLE-V5T-NEXT:  LBB5_2: @ %for.body
+; ENABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r2, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    adds r0, r2, r0
+; ENABLE-V5T-NEXT:    subs r1, r1, #1
+; ENABLE-V5T-NEXT:    bne LBB5_2
+; ENABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; ENABLE-V5T-NEXT:    lsls r0, r0, #3
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:  LBB5_4: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB5_5: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+; ENABLE-V5T-NEXT:    @ -- End function
+; ENABLE-V5T-NEXT:    .globl _emptyFrame @ -- Begin function emptyFrame
+; ENABLE-V5T-NEXT:    .p2align 1
+; ENABLE-V5T-NEXT:    .code 16 @ @emptyFrame
+; ENABLE-V5T-NEXT:    .thumb_func _emptyFrame
+; ENABLE-V5T-NEXT:  _emptyFrame:
+; ENABLE-V5T-NEXT:    .cfi_startproc
+; ENABLE-V5T-NEXT:  @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: loopInfoRestoreOutsideLoop:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    beq LBB5_4
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    movs r1, #10
+; DISABLE-V4T-NEXT:  LBB5_2: @ %for.body
+; DISABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r2, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    adds r0, r2, r0
+; DISABLE-V4T-NEXT:    subs r1, r1, #1
+; DISABLE-V4T-NEXT:    bne LBB5_2
+; DISABLE-V4T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V4T-NEXT:    lsls r0, r0, #3
+; DISABLE-V4T-NEXT:    b LBB5_5
+; DISABLE-V4T-NEXT:  LBB5_4: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB5_5: @ %if.end
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:    @ -- End function
+; DISABLE-V4T-NEXT:    .globl _emptyFrame @ -- Begin function emptyFrame
+; DISABLE-V4T-NEXT:    .p2align 1
+; DISABLE-V4T-NEXT:    .code 16 @ @emptyFrame
+; DISABLE-V4T-NEXT:    .thumb_func _emptyFrame
+; DISABLE-V4T-NEXT:  _emptyFrame:
+; DISABLE-V4T-NEXT:    .cfi_startproc
+; DISABLE-V4T-NEXT:  @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    bx lr
+;
+; DISABLE-V5T-LABEL: loopInfoRestoreOutsideLoop:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    beq LBB5_4
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    movs r1, #10
+; DISABLE-V5T-NEXT:  LBB5_2: @ %for.body
+; DISABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r2, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    adds r0, r2, r0
+; DISABLE-V5T-NEXT:    subs r1, r1, #1
+; DISABLE-V5T-NEXT:    bne LBB5_2
+; DISABLE-V5T-NEXT:  @ %bb.3: @ %for.end
+; DISABLE-V5T-NEXT:    lsls r0, r0, #3
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:  LBB5_4: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:    @ -- End function
+; DISABLE-V5T-NEXT:    .globl _emptyFrame @ -- Begin function emptyFrame
+; DISABLE-V5T-NEXT:    .p2align 1
+; DISABLE-V5T-NEXT:    .code 16 @ @emptyFrame
+; DISABLE-V5T-NEXT:    .thumb_func _emptyFrame
+; DISABLE-V5T-NEXT:  _emptyFrame:
+; DISABLE-V5T-NEXT:    .cfi_startproc
+; DISABLE-V5T-NEXT:  @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    bx lr
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -406,59 +900,138 @@ if.end:
 }
 
 ; Check that we handle function with no frame information correctly.
-; CHECK-LABEL: emptyFrame:
-; CHECK: @ %entry
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: bx lr
 define i32 @emptyFrame() {
 entry:
   ret i32 0
 }
 
 ; Check that we handle inline asm correctly.
-; CHECK-LABEL: inlineAsm:
-;
-; ENABLE: cmp r0, #0
-; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: r4.
-; CHECK: push {r4, lr}
-;
-; DISABLE: cmp r0, #0
-; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: movs [[IV:r[0-9]+]], #10
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
-; CHECK: movs r4, #1
-; CHECK: subs [[IV]], [[IV]], #1
-; CHECK-NEXT: bne [[LOOP]]
-;
-; Next BB.
-; CHECK: movs r0, #0
-; ENABLE-V5T-NEXT: pop {r4, pc}
-; ENABLE-V4T-NEXT: pop {r4}
-; ENABLE-V4T-NEXT: pop {r1}
-; ENABLE-V4T-NEXT: bx r1
-;
-; Duplicated epilogue.
-; DISABLE-V5T-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
-; DISABLE-V5T-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-V4T-NEXT: pop {r4}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-;
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
 define i32 @inlineAsm(i32 %cond, i32 %N) {
+; ENABLE-V4T-LABEL: inlineAsm:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    beq LBB7_4
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #10
+; ENABLE-V4T-NEXT:  LBB7_2: @ %for.body
+; ENABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r4, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    subs r0, r0, #1
+; ENABLE-V4T-NEXT:    bne LBB7_2
+; ENABLE-V4T-NEXT:  @ %bb.3: @ %for.exit
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    mov r8, r8
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB7_4: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+;
+; ENABLE-V5T-LABEL: inlineAsm:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    beq LBB7_4
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #10
+; ENABLE-V5T-NEXT:  LBB7_2: @ %for.body
+; ENABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r4, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    subs r0, r0, #1
+; ENABLE-V5T-NEXT:    bne LBB7_2
+; ENABLE-V5T-NEXT:  @ %bb.3: @ %for.exit
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    mov r8, r8
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:  LBB7_4: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB7_5: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: inlineAsm:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    beq LBB7_4
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #10
+; DISABLE-V4T-NEXT:  LBB7_2: @ %for.body
+; DISABLE-V4T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r4, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    subs r0, r0, #1
+; DISABLE-V4T-NEXT:    bne LBB7_2
+; DISABLE-V4T-NEXT:  @ %bb.3: @ %for.exit
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    mov r8, r8
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:    b LBB7_5
+; DISABLE-V4T-NEXT:  LBB7_4: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB7_5: @ %if.end
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+;
+; DISABLE-V5T-LABEL: inlineAsm:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    beq LBB7_4
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %for.preheader
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #10
+; DISABLE-V5T-NEXT:  LBB7_2: @ %for.body
+; DISABLE-V5T-NEXT:    @ =>This Inner Loop Header: Depth=1
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r4, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    subs r0, r0, #1
+; DISABLE-V5T-NEXT:    bne LBB7_2
+; DISABLE-V5T-NEXT:  @ %bb.3: @ %for.exit
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    mov r8, r8
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:  LBB7_4: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    pop {r4, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -488,54 +1061,133 @@ if.end:
 }
 
 ; Check that we handle calls to variadic functions correctly.
-; CHECK-LABEL: callVariadicFunc:
-;
-; ENABLE: cmp r0, #0
-; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: push {[[TMP:r[0-9]+]], lr}
-; CHECK: sub sp, #16
-;
-; DISABLE: cmp r0, #0
-; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Setup of the varags.
-; CHECK: str r1, [sp]
-; CHECK-NEXT: str r1, [sp, #4]
-; CHECK-NEXT: str r1, [sp, #8]
-; CHECK:      movs r0, r1
-; CHECK-NEXT: movs r2, r1
-; CHECK-NEXT: movs r3, r1
-; CHECK-NEXT: bl
-; CHECK-NEXT: lsls r0, r0, #3
-;
-; ENABLE-NEXT: add sp, #16
-; ENABLE-V5T-NEXT: pop {[[TMP]], pc}
-; ENABLE-V4T-NEXT: pop {[[TMP]]}
-; ENABLE-V4T-NEXT: pop {r1}
-; ENABLE-V4T-NEXT: bx r1
-;
-; Duplicated epilogue.
-; DISABLE-V5T-NEXT: add sp, #16
-; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
-; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
-;
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: lsls r0, r1, #1
-;
-; Epilogue code.
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
-;
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-NEXT: add sp, #16
-; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
-; DISABLE-V4T-NEXT: pop {[[TMP]]}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+; ENABLE-V4T-LABEL: callVariadicFunc:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    beq LBB8_2
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    sub sp, #16
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 24
+; ENABLE-V4T-NEXT:    str r1, [sp]
+; ENABLE-V4T-NEXT:    str r1, [sp, #4]
+; ENABLE-V4T-NEXT:    str r1, [sp, #8]
+; ENABLE-V4T-NEXT:    ldr r0, LCPI8_0
+; ENABLE-V4T-NEXT:  LPC8_0:
+; ENABLE-V4T-NEXT:    add r0, pc
+; ENABLE-V4T-NEXT:    ldr r4, [r0]
+; ENABLE-V4T-NEXT:    movs r0, r1
+; ENABLE-V4T-NEXT:    movs r2, r1
+; ENABLE-V4T-NEXT:    movs r3, r1
+; ENABLE-V4T-NEXT:    bl Ltmp2
+; ENABLE-V4T-NEXT:    lsls r0, r0, #3
+; ENABLE-V4T-NEXT:    add sp, #16
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB8_2: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    .p2align 2
+; ENABLE-V4T-NEXT:  @ %bb.3:
+; ENABLE-V4T-NEXT:    .data_region
+; ENABLE-V4T-NEXT:  LCPI8_0:
+; ENABLE-V4T-NEXT:    .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4)
+; ENABLE-V4T-NEXT:    .end_data_region
+;
+; ENABLE-V5T-LABEL: callVariadicFunc:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    beq LBB8_2
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V5T-NEXT:    push {r7, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V5T-NEXT:    sub sp, #16
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 24
+; ENABLE-V5T-NEXT:    str r1, [sp]
+; ENABLE-V5T-NEXT:    str r1, [sp, #4]
+; ENABLE-V5T-NEXT:    str r1, [sp, #8]
+; ENABLE-V5T-NEXT:    movs r0, r1
+; ENABLE-V5T-NEXT:    movs r2, r1
+; ENABLE-V5T-NEXT:    movs r3, r1
+; ENABLE-V5T-NEXT:    bl _someVariadicFunc
+; ENABLE-V5T-NEXT:    lsls r0, r0, #3
+; ENABLE-V5T-NEXT:    add sp, #16
+; ENABLE-V5T-NEXT:    pop {r7, pc}
+; ENABLE-V5T-NEXT:  LBB8_2: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB8_3: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+;
+; DISABLE-V4T-LABEL: callVariadicFunc:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    sub sp, #16
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 24
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    beq LBB8_2
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V4T-NEXT:    str r1, [sp]
+; DISABLE-V4T-NEXT:    str r1, [sp, #4]
+; DISABLE-V4T-NEXT:    str r1, [sp, #8]
+; DISABLE-V4T-NEXT:    ldr r0, LCPI8_0
+; DISABLE-V4T-NEXT:  LPC8_0:
+; DISABLE-V4T-NEXT:    add r0, pc
+; DISABLE-V4T-NEXT:    ldr r4, [r0]
+; DISABLE-V4T-NEXT:    movs r0, r1
+; DISABLE-V4T-NEXT:    movs r2, r1
+; DISABLE-V4T-NEXT:    movs r3, r1
+; DISABLE-V4T-NEXT:    bl Ltmp2
+; DISABLE-V4T-NEXT:    lsls r0, r0, #3
+; DISABLE-V4T-NEXT:    b LBB8_3
+; DISABLE-V4T-NEXT:  LBB8_2: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB8_3: @ %if.end
+; DISABLE-V4T-NEXT:    add sp, #16
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:    .p2align 2
+; DISABLE-V4T-NEXT:  @ %bb.4:
+; DISABLE-V4T-NEXT:    .data_region
+; DISABLE-V4T-NEXT:  LCPI8_0:
+; DISABLE-V4T-NEXT:    .long L_someVariadicFunc$non_lazy_ptr-(LPC8_0+4)
+; DISABLE-V4T-NEXT:    .end_data_region
+;
+; DISABLE-V5T-LABEL: callVariadicFunc:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r7, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V5T-NEXT:    sub sp, #16
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 24
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    beq LBB8_2
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V5T-NEXT:    str r1, [sp]
+; DISABLE-V5T-NEXT:    str r1, [sp, #4]
+; DISABLE-V5T-NEXT:    str r1, [sp, #8]
+; DISABLE-V5T-NEXT:    movs r0, r1
+; DISABLE-V5T-NEXT:    movs r2, r1
+; DISABLE-V5T-NEXT:    movs r3, r1
+; DISABLE-V5T-NEXT:    bl _someVariadicFunc
+; DISABLE-V5T-NEXT:    lsls r0, r0, #3
+; DISABLE-V5T-NEXT:    add sp, #16
+; DISABLE-V5T-NEXT:    pop {r7, pc}
+; DISABLE-V5T-NEXT:  LBB8_2: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    add sp, #16
+; DISABLE-V5T-NEXT:    pop {r7, pc}
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -560,25 +1212,96 @@ declare i32 @someVariadicFunc(i32, ...)
 ; Although this is not incorrect to insert such code, it is useless
 ; and it hurts the binary size.
 ;
-; CHECK-LABEL: noreturn:
-; DISABLE: push
-;
-; CHECK: cmp r0, #0
-; CHECK-NEXT: bne      [[ABORT:LBB[0-9_]+]]
-;
-; CHECK: movs r0, #42
-;
-; ENABLE-NEXT: bx lr
-;
-; DISABLE-NEXT: pop
-;;
-; CHECK: [[ABORT]]: @ %if.abort
-;
-; ENABLE: push
-;
-; CHECK: bl
-; ENABLE-NOT: pop
 define i32 @noreturn(i8 signext %bad_thing) {
+; ENABLE-V4T-LABEL: noreturn:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    cmp r0, #0
+; ENABLE-V4T-NEXT:    bne LBB9_2
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V4T-NEXT:    movs r0, #42
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:  LBB9_2: @ %if.abort
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    ldr r0, LCPI9_0
+; ENABLE-V4T-NEXT:  LPC9_0:
+; ENABLE-V4T-NEXT:    add r0, pc
+; ENABLE-V4T-NEXT:    ldr r0, [r0]
+; ENABLE-V4T-NEXT:    @ InlineAsm Start
+; ENABLE-V4T-NEXT:    movs r1, #1
+; ENABLE-V4T-NEXT:    @ InlineAsm End
+; ENABLE-V4T-NEXT:    bl Ltmp3
+; ENABLE-V4T-NEXT:    .p2align 2
+; ENABLE-V4T-NEXT:  @ %bb.3:
+; ENABLE-V4T-NEXT:    .data_region
+; ENABLE-V4T-NEXT:  LCPI9_0:
+; ENABLE-V4T-NEXT:    .long L_abort$non_lazy_ptr-(LPC9_0+4)
+; ENABLE-V4T-NEXT:    .end_data_region
+;
+; ENABLE-V5T-LABEL: noreturn:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    cmp r0, #0
+; ENABLE-V5T-NEXT:    bne LBB9_2
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V5T-NEXT:    movs r0, #42
+; ENABLE-V5T-NEXT:    bx lr
+; ENABLE-V5T-NEXT:  LBB9_2: @ %if.abort
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    @ InlineAsm Start
+; ENABLE-V5T-NEXT:    movs r0, #1
+; ENABLE-V5T-NEXT:    @ InlineAsm End
+; ENABLE-V5T-NEXT:    bl _abort
+;
+; DISABLE-V4T-LABEL: noreturn:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    cmp r0, #0
+; DISABLE-V4T-NEXT:    bne LBB9_2
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %if.end
+; DISABLE-V4T-NEXT:    movs r0, #42
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:  LBB9_2: @ %if.abort
+; DISABLE-V4T-NEXT:    ldr r0, LCPI9_0
+; DISABLE-V4T-NEXT:  LPC9_0:
+; DISABLE-V4T-NEXT:    add r0, pc
+; DISABLE-V4T-NEXT:    ldr r0, [r0]
+; DISABLE-V4T-NEXT:    @ InlineAsm Start
+; DISABLE-V4T-NEXT:    movs r1, #1
+; DISABLE-V4T-NEXT:    @ InlineAsm End
+; DISABLE-V4T-NEXT:    bl Ltmp3
+; DISABLE-V4T-NEXT:    .p2align 2
+; DISABLE-V4T-NEXT:  @ %bb.3:
+; DISABLE-V4T-NEXT:    .data_region
+; DISABLE-V4T-NEXT:  LCPI9_0:
+; DISABLE-V4T-NEXT:    .long L_abort$non_lazy_ptr-(LPC9_0+4)
+; DISABLE-V4T-NEXT:    .end_data_region
+;
+; DISABLE-V5T-LABEL: noreturn:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    cmp r0, #0
+; DISABLE-V5T-NEXT:    bne LBB9_2
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %if.end
+; DISABLE-V5T-NEXT:    movs r0, #42
+; DISABLE-V5T-NEXT:    pop {r4, pc}
+; DISABLE-V5T-NEXT:  LBB9_2: @ %if.abort
+; DISABLE-V5T-NEXT:    @ InlineAsm Start
+; DISABLE-V5T-NEXT:    movs r0, #1
+; DISABLE-V5T-NEXT:    @ InlineAsm End
+; DISABLE-V5T-NEXT:    bl _abort
 entry:
   %tobool = icmp eq i8 %bad_thing, 0
   br i1 %tobool, label %if.end, label %if.abort
@@ -595,32 +1318,113 @@ if.end:
 declare void @abort() #0
 
 define i32 @b_to_bx(i32 %value) {
-; CHECK-LABEL: b_to_bx:
-; DISABLE: push {r7, lr}
-; CHECK: cmp r0, #49
-; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
-; ENABLE: push {r7, lr}
-
-; CHECK: bl
-; DISABLE-V5-NEXT: pop {r7, pc}
-; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
-
-; ENABLE-V5-NEXT: pop {r7, pc}
-; ENABLE-V4-NEXT: pop {r7}
-; ENABLE-V4-NEXT: pop {r1}
-; ENABLE-V4-NEXT: bx r1
-
-; CHECK: [[ELSE_LABEL]]: @ %if.else
-; CHECK-NEXT: lsls r0, r1, #1
-; DISABLE-V5-NEXT: pop {r7, pc}
-; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
-; DISABLE-V4T-NEXT: pop {r7}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-
-; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
-; ENABLE-NEXT: bx lr
-
+; ENABLE-V4T-LABEL: b_to_bx:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    movs r1, r0
+; ENABLE-V4T-NEXT:    cmp r0, #49
+; ENABLE-V4T-NEXT:    bgt LBB10_2
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V4T-NEXT:    push {r7, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V4T-NEXT:    ldr r0, LCPI10_0
+; ENABLE-V4T-NEXT:    ldr r2, LCPI10_1
+; ENABLE-V4T-NEXT:  LPC10_0:
+; ENABLE-V4T-NEXT:    add r2, pc
+; ENABLE-V4T-NEXT:    bl Ltmp4
+; ENABLE-V4T-NEXT:    pop {r7}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:  LBB10_2: @ %if.else
+; ENABLE-V4T-NEXT:    lsls r0, r1, #1
+; ENABLE-V4T-NEXT:    bx lr
+; ENABLE-V4T-NEXT:    .p2align 2
+; ENABLE-V4T-NEXT:  @ %bb.3:
+; ENABLE-V4T-NEXT:    .data_region
+; ENABLE-V4T-NEXT:  LCPI10_0:
+; ENABLE-V4T-NEXT:    .long 5000 @ 0x1388
+; ENABLE-V4T-NEXT:  LCPI10_1:
+; ENABLE-V4T-NEXT:    .long ___divsi3-(LPC10_0+4)
+; ENABLE-V4T-NEXT:    .end_data_region
+;
+; ENABLE-V5T-LABEL: b_to_bx:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    movs r1, r0
+; ENABLE-V5T-NEXT:    cmp r0, #49
+; ENABLE-V5T-NEXT:    bgt LBB10_2
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; ENABLE-V5T-NEXT:    push {r7, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r7, -8
+; ENABLE-V5T-NEXT:    ldr r0, LCPI10_0
+; ENABLE-V5T-NEXT:    bl ___divsi3
+; ENABLE-V5T-NEXT:    pop {r7, pc}
+; ENABLE-V5T-NEXT:  LBB10_2: @ %if.else
+; ENABLE-V5T-NEXT:    lsls r0, r1, #1
+; ENABLE-V5T-NEXT:  LBB10_3: @ %if.end
+; ENABLE-V5T-NEXT:    bx lr
+; ENABLE-V5T-NEXT:    .p2align 2
+; ENABLE-V5T-NEXT:  @ %bb.4:
+; ENABLE-V5T-NEXT:    .data_region
+; ENABLE-V5T-NEXT:  LCPI10_0:
+; ENABLE-V5T-NEXT:    .long 5000 @ 0x1388
+; ENABLE-V5T-NEXT:    .end_data_region
+;
+; DISABLE-V4T-LABEL: b_to_bx:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r7, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V4T-NEXT:    movs r1, r0
+; DISABLE-V4T-NEXT:    cmp r0, #49
+; DISABLE-V4T-NEXT:    bgt LBB10_2
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V4T-NEXT:    ldr r0, LCPI10_0
+; DISABLE-V4T-NEXT:    ldr r2, LCPI10_1
+; DISABLE-V4T-NEXT:  LPC10_0:
+; DISABLE-V4T-NEXT:    add r2, pc
+; DISABLE-V4T-NEXT:    bl Ltmp4
+; DISABLE-V4T-NEXT:    b LBB10_3
+; DISABLE-V4T-NEXT:  LBB10_2: @ %if.else
+; DISABLE-V4T-NEXT:    lsls r0, r1, #1
+; DISABLE-V4T-NEXT:  LBB10_3: @ %if.end
+; DISABLE-V4T-NEXT:    pop {r7}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+; DISABLE-V4T-NEXT:    .p2align 2
+; DISABLE-V4T-NEXT:  @ %bb.4:
+; DISABLE-V4T-NEXT:    .data_region
+; DISABLE-V4T-NEXT:  LCPI10_0:
+; DISABLE-V4T-NEXT:    .long 5000 @ 0x1388
+; DISABLE-V4T-NEXT:  LCPI10_1:
+; DISABLE-V4T-NEXT:    .long ___divsi3-(LPC10_0+4)
+; DISABLE-V4T-NEXT:    .end_data_region
+;
+; DISABLE-V5T-LABEL: b_to_bx:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r7, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r7, -8
+; DISABLE-V5T-NEXT:    movs r1, r0
+; DISABLE-V5T-NEXT:    cmp r0, #49
+; DISABLE-V5T-NEXT:    bgt LBB10_2
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %if.then
+; DISABLE-V5T-NEXT:    ldr r0, LCPI10_0
+; DISABLE-V5T-NEXT:    bl ___divsi3
+; DISABLE-V5T-NEXT:    pop {r7, pc}
+; DISABLE-V5T-NEXT:  LBB10_2: @ %if.else
+; DISABLE-V5T-NEXT:    lsls r0, r1, #1
+; DISABLE-V5T-NEXT:    pop {r7, pc}
+; DISABLE-V5T-NEXT:    .p2align 2
+; DISABLE-V5T-NEXT:  @ %bb.3:
+; DISABLE-V5T-NEXT:    .data_region
+; DISABLE-V5T-NEXT:  LCPI10_0:
+; DISABLE-V5T-NEXT:    .long 5000 @ 0x1388
+; DISABLE-V5T-NEXT:    .end_data_region
 entry:
   %cmp = icmp slt i32 %value, 50
   br i1 %cmp, label %if.then, label %if.else
@@ -639,29 +1443,93 @@ if.end:
 }
 
 define i1 @beq_to_bx(i32* %y, i32 %head) {
-; CHECK-LABEL: beq_to_bx:
-; DISABLE: push {r4, lr}
-; CHECK: cmp r2, #0
-; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]]
-; ENABLE: push {r4, lr}
-
-; CHECK: lsls    r4, r3, #30
-; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #4]
-; ENABLE-NEXT: mov lr, [[POP]]
-; ENABLE-NEXT: pop {[[POP]]}
-; ENABLE-NEXT: add sp, #4
-; CHECK-NEXT: bpl [[EXIT_LABEL]]
-
-; CHECK: str r1, [r2]
-; CHECK: str r3, [r2]
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup
-; ENABLE-NEXT: bx lr
-; DISABLE-V5-NEXT: pop {r4, pc}
-; DISABLE-V4T-NEXT: pop {r4}
-; DISABLE-V4T-NEXT: pop {r1}
-; DISABLE-V4T-NEXT: bx r1
-
+; ENABLE-V4T-LABEL: beq_to_bx:
+; ENABLE-V4T:       @ %bb.0: @ %entry
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V4T-NEXT:    movs r2, r0
+; ENABLE-V4T-NEXT:    movs r0, #1
+; ENABLE-V4T-NEXT:    cmp r2, #0
+; ENABLE-V4T-NEXT:    beq LBB11_3
+; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V4T-NEXT:    ldr r3, [r2]
+; ENABLE-V4T-NEXT:    lsls r4, r3, #30
+; ENABLE-V4T-NEXT:    bpl LBB11_3
+; ENABLE-V4T-NEXT:  @ %bb.2: @ %if.end4
+; ENABLE-V4T-NEXT:    str r1, [r2]
+; ENABLE-V4T-NEXT:    str r3, [r2]
+; ENABLE-V4T-NEXT:    movs r0, #0
+; ENABLE-V4T-NEXT:  LBB11_3: @ %cleanup
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    pop {r1}
+; ENABLE-V4T-NEXT:    bx r1
+;
+; ENABLE-V5T-LABEL: beq_to_bx:
+; ENABLE-V5T:       @ %bb.0: @ %entry
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
+; ENABLE-V5T-NEXT:    movs r2, r0
+; ENABLE-V5T-NEXT:    movs r0, #1
+; ENABLE-V5T-NEXT:    cmp r2, #0
+; ENABLE-V5T-NEXT:    beq LBB11_3
+; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V5T-NEXT:    ldr r3, [r2]
+; ENABLE-V5T-NEXT:    lsls r4, r3, #30
+; ENABLE-V5T-NEXT:    bpl LBB11_3
+; ENABLE-V5T-NEXT:  @ %bb.2: @ %if.end4
+; ENABLE-V5T-NEXT:    str r1, [r2]
+; ENABLE-V5T-NEXT:    str r3, [r2]
+; ENABLE-V5T-NEXT:    movs r0, #0
+; ENABLE-V5T-NEXT:  LBB11_3: @ %cleanup
+; ENABLE-V5T-NEXT:    pop {r4, pc}
+;
+; DISABLE-V4T-LABEL: beq_to_bx:
+; DISABLE-V4T:       @ %bb.0: @ %entry
+; DISABLE-V4T-NEXT:    push {r4, lr}
+; DISABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V4T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V4T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V4T-NEXT:    movs r2, r0
+; DISABLE-V4T-NEXT:    movs r0, #1
+; DISABLE-V4T-NEXT:    cmp r2, #0
+; DISABLE-V4T-NEXT:    beq LBB11_3
+; DISABLE-V4T-NEXT:  @ %bb.1: @ %if.end
+; DISABLE-V4T-NEXT:    ldr r3, [r2]
+; DISABLE-V4T-NEXT:    lsls r4, r3, #30
+; DISABLE-V4T-NEXT:    bpl LBB11_3
+; DISABLE-V4T-NEXT:  @ %bb.2: @ %if.end4
+; DISABLE-V4T-NEXT:    str r1, [r2]
+; DISABLE-V4T-NEXT:    str r3, [r2]
+; DISABLE-V4T-NEXT:    movs r0, #0
+; DISABLE-V4T-NEXT:  LBB11_3: @ %cleanup
+; DISABLE-V4T-NEXT:    pop {r4}
+; DISABLE-V4T-NEXT:    pop {r1}
+; DISABLE-V4T-NEXT:    bx r1
+;
+; DISABLE-V5T-LABEL: beq_to_bx:
+; DISABLE-V5T:       @ %bb.0: @ %entry
+; DISABLE-V5T-NEXT:    push {r4, lr}
+; DISABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; DISABLE-V5T-NEXT:    .cfi_offset lr, -4
+; DISABLE-V5T-NEXT:    .cfi_offset r4, -8
+; DISABLE-V5T-NEXT:    movs r2, r0
+; DISABLE-V5T-NEXT:    movs r0, #1
+; DISABLE-V5T-NEXT:    cmp r2, #0
+; DISABLE-V5T-NEXT:    beq LBB11_3
+; DISABLE-V5T-NEXT:  @ %bb.1: @ %if.end
+; DISABLE-V5T-NEXT:    ldr r3, [r2]
+; DISABLE-V5T-NEXT:    lsls r4, r3, #30
+; DISABLE-V5T-NEXT:    bpl LBB11_3
+; DISABLE-V5T-NEXT:  @ %bb.2: @ %if.end4
+; DISABLE-V5T-NEXT:    str r1, [r2]
+; DISABLE-V5T-NEXT:    str r3, [r2]
+; DISABLE-V5T-NEXT:    movs r0, #0
+; DISABLE-V5T-NEXT:  LBB11_3: @ %cleanup
+; DISABLE-V5T-NEXT:    pop {r4, pc}
 entry:
   %cmp = icmp eq i32* %y, null
   br i1 %cmp, label %cleanup, label %if.end

Modified: llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll Thu Jun 13 06:56:19 2019
@@ -9,6 +9,7 @@
 define i32 @main() nounwind {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    sbbb %al, %al
@@ -21,7 +22,6 @@ define i32 @main() nounwind {
 ; CHECK-NEXT:  .LBB0_1: # %entry.if.end_crit_edge
 ; CHECK-NEXT:    movl {{.*}}(%rip), %esi
 ; CHECK-NEXT:  .LBB0_3: # %if.end
-; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl $.L.str, %edi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    callq printf

Modified: llvm/trunk/test/CodeGen/X86/MachineSink-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MachineSink-eflags.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MachineSink-eflags.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MachineSink-eflags.ll Thu Jun 13 06:56:19 2019
@@ -14,6 +14,7 @@ target triple = "x86_64-pc-linux"
 define void @foo(i8* nocapture %_stubArgs) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $152, %rsp
 ; CHECK-NEXT:    movq 48(%rdi), %rax
 ; CHECK-NEXT:    movl 64(%rdi), %edx
 ; CHECK-NEXT:    movl $200, %esi
@@ -29,14 +30,14 @@ define void @foo(i8* nocapture %_stubArg
 ; CHECK-NEXT:    jne .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %entry
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    je .LBB0_4
+; CHECK-NEXT:    jmp .LBB0_5
 ; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    movaps (%rax,%rcx), %xmm1
-; CHECK-NEXT:  .LBB0_3: # %entry
-; CHECK-NEXT:    leaq -{{[0-9]+}}(%rsp), %rsp
 ; CHECK-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    jne .LBB0_5
-; CHECK-NEXT:  # %bb.4: # %entry
+; CHECK-NEXT:  .LBB0_4: # %entry
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:  .LBB0_5: # %entry
 ; CHECK-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)

Modified: llvm/trunk/test/CodeGen/X86/cmov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cmov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cmov.ll Thu Jun 13 06:56:19 2019
@@ -78,6 +78,7 @@ define void @test3(i64 %a, i64 %b, i1 %p
 define i1 @test4() nounwind {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movsbl {{.*}}(%rip), %edx
 ; CHECK-NEXT:    movzbl %dl, %ecx
 ; CHECK-NEXT:    shrl $7, %ecx
@@ -90,7 +91,6 @@ define i1 @test4() nounwind {
 ; CHECK-NEXT:  # %bb.1: # %bb.i.i.i
 ; CHECK-NEXT:    movb {{.*}}(%rip), %cl
 ; CHECK-NEXT:  .LBB3_2: # %func_4.exit.i
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    setne %bl

Modified: llvm/trunk/test/CodeGen/X86/copy-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/copy-eflags.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/copy-eflags.ll (original)
+++ llvm/trunk/test/CodeGen/X86/copy-eflags.ll Thu Jun 13 06:56:19 2019
@@ -43,6 +43,7 @@ define i32 @test1() nounwind {
 ;
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movb {{.*}}(%rip), %cl
 ; X64-NEXT:    leal 1(%rcx), %eax
 ; X64-NEXT:    movb %al, {{.*}}(%rip)
@@ -56,12 +57,11 @@ define i32 @test1() nounwind {
 ; X64-NEXT:    testb %dl, %dl
 ; X64-NEXT:    jne .LBB0_2
 ; X64-NEXT:  # %bb.1: # %if.then
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movsbl %al, %edi
 ; X64-NEXT:    callq external
-; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB0_2: # %if.end
 ; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
 entry:
   %bval = load i8, i8* @b

Modified: llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-2.ll Thu Jun 13 06:56:19 2019
@@ -17,16 +17,18 @@
 define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
 ; X32-LABEL: program_1:
 ; X32:       ## %bb.0: ## %entry
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    subl $88, %esp
 ; X32-NEXT:    cmpl $0, 0
 ; X32-NEXT:    jle LBB0_2
 ; X32-NEXT:  ## %bb.1: ## %forcond
 ; X32-NEXT:    cmpl $0, 0
 ; X32-NEXT:    jg LBB0_3
 ; X32-NEXT:  LBB0_2: ## %ifthen
+; X32-NEXT:    addl $88, %esp
+; X32-NEXT:    popl %esi
 ; X32-NEXT:    retl
 ; X32-NEXT:  LBB0_3: ## %forbody
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    subl $88, %esp
 ; X32-NEXT:    movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
 ; X32-NEXT:    minps LCPI0_3, %xmm1
 ; X32-NEXT:    cvttps2dq %xmm1, %xmm0
@@ -99,16 +101,18 @@ define void @program_1(%struct._image2d_
 ;
 ; X64-LABEL: program_1:
 ; X64:       ## %bb.0: ## %entry
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $64, %rsp
 ; X64-NEXT:    cmpl $0, 0
 ; X64-NEXT:    jle LBB0_2
 ; X64-NEXT:  ## %bb.1: ## %forcond
 ; X64-NEXT:    cmpl $0, 0
 ; X64-NEXT:    jg LBB0_3
 ; X64-NEXT:  LBB0_2: ## %ifthen
+; X64-NEXT:    addq $64, %rsp
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
 ; X64-NEXT:  LBB0_3: ## %forbody
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    subq $64, %rsp
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
 ; X64-NEXT:    movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]

Modified: llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i386-shrink-wrapping.ll Thu Jun 13 06:56:19 2019
@@ -1,5 +1,6 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -enable-shrink-wrap=true -no-x86-call-frame-opt | FileCheck %s --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -no-x86-call-frame-opt | FileCheck %s --check-prefix=DISABLE
 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 target triple = "i386-apple-macosx10.5"
 
@@ -15,53 +16,83 @@ target triple = "i386-apple-macosx10.5"
 ; Check that we are clobbering the flags when they are live-in of the
 ; prologue block and the prologue needs to adjust the stack.
 ; PR25607.
-;
-; CHECK-LABEL: eflagsLiveInPrologue:
-;
-; DISABLE: pushl
-; DISABLE-NEXT: subl $8, %esp
-;
-; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]]
-; CHECK-NEXT: cmpl $0, ([[A]])
-; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]]
-;
-; CHECK: movb $1, _d
-;
-; CHECK: [[PREHEADER_LABEL]]:
-; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]]
-; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]]
-; CHECK-NEXT: testl [[TMP1]], [[TMP1]]
-; CHECK-NEXT: je  [[FOREND_LABEL:LBB[0-9_]+]]
-;
-; Skip the loop.
-; [...]
-;
-; The for.end block is split to accomadate the different selects.
-; We are interested in the one with the call, so skip until the branch.
-; CHECK: [[FOREND_LABEL]]:
-
-; ENABLE: pushl
-; ENABLE-NEXT: subl $8, %esp
-
-; CHECK: xorl [[CMOVE_VAL:%edx]], [[CMOVE_VAL]]
-; CHECK-NEXT: cmpb $0, _d
-; CHECK-NEXT: movl $6, [[IMM_VAL:%ecx]]
-; The eflags is used in the next instruction.
-; If that instruction disappear, we are not exercising the bug
-; anymore.
-; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]]
-
-; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
-; CHECK-NEXT: movb %cl, ([[E]])
-; CHECK-NEXT: leal 1(%ecx), %esi
 
-; CHECK: calll _varfunc
-; Set the return value to 0.
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: addl $8, %esp
-; CHECK-NEXT: popl
-; CHECK-NEXT: retl
 define i32 @eflagsLiveInPrologue() #0 {
+; ENABLE-LABEL: eflagsLiveInPrologue:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushl %esi
+; ENABLE-NEXT:    subl $8, %esp
+; ENABLE-NEXT:    movl L_a$non_lazy_ptr, %eax
+; ENABLE-NEXT:    cmpl $0, (%eax)
+; ENABLE-NEXT:    je LBB0_2
+; ENABLE-NEXT:  ## %bb.1: ## %if.then
+; ENABLE-NEXT:    movb $1, _d
+; ENABLE-NEXT:  LBB0_2: ## %for.cond.preheader
+; ENABLE-NEXT:    movl L_b$non_lazy_ptr, %eax
+; ENABLE-NEXT:    movl (%eax), %eax
+; ENABLE-NEXT:    testl %eax, %eax
+; ENABLE-NEXT:    je LBB0_4
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB0_3: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    jmp LBB0_3
+; ENABLE-NEXT:  LBB0_4: ## %for.end
+; ENABLE-NEXT:    xorl %edx, %edx
+; ENABLE-NEXT:    cmpb $0, _d
+; ENABLE-NEXT:    movl $6, %ecx
+; ENABLE-NEXT:    cmovnel %edx, %ecx
+; ENABLE-NEXT:    movl L_e$non_lazy_ptr, %edx
+; ENABLE-NEXT:    movb %cl, (%edx)
+; ENABLE-NEXT:    leal 1(%ecx), %esi
+; ENABLE-NEXT:    cltd
+; ENABLE-NEXT:    idivl %esi
+; ENABLE-NEXT:    movl L_c$non_lazy_ptr, %eax
+; ENABLE-NEXT:    movl %edx, (%eax)
+; ENABLE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; ENABLE-NEXT:    movl $L_.str, (%esp)
+; ENABLE-NEXT:    calll _varfunc
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    addl $8, %esp
+; ENABLE-NEXT:    popl %esi
+; ENABLE-NEXT:    retl
+;
+; DISABLE-LABEL: eflagsLiveInPrologue:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushl %esi
+; DISABLE-NEXT:    subl $8, %esp
+; DISABLE-NEXT:    movl L_a$non_lazy_ptr, %eax
+; DISABLE-NEXT:    cmpl $0, (%eax)
+; DISABLE-NEXT:    je LBB0_2
+; DISABLE-NEXT:  ## %bb.1: ## %if.then
+; DISABLE-NEXT:    movb $1, _d
+; DISABLE-NEXT:  LBB0_2: ## %for.cond.preheader
+; DISABLE-NEXT:    movl L_b$non_lazy_ptr, %eax
+; DISABLE-NEXT:    movl (%eax), %eax
+; DISABLE-NEXT:    testl %eax, %eax
+; DISABLE-NEXT:    je LBB0_4
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB0_3: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    jmp LBB0_3
+; DISABLE-NEXT:  LBB0_4: ## %for.end
+; DISABLE-NEXT:    xorl %edx, %edx
+; DISABLE-NEXT:    cmpb $0, _d
+; DISABLE-NEXT:    movl $6, %ecx
+; DISABLE-NEXT:    cmovnel %edx, %ecx
+; DISABLE-NEXT:    movl L_e$non_lazy_ptr, %edx
+; DISABLE-NEXT:    movb %cl, (%edx)
+; DISABLE-NEXT:    leal 1(%ecx), %esi
+; DISABLE-NEXT:    cltd
+; DISABLE-NEXT:    idivl %esi
+; DISABLE-NEXT:    movl L_c$non_lazy_ptr, %eax
+; DISABLE-NEXT:    movl %edx, (%eax)
+; DISABLE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; DISABLE-NEXT:    movl $L_.str, (%esp)
+; DISABLE-NEXT:    calll _varfunc
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    addl $8, %esp
+; DISABLE-NEXT:    popl %esi
+; DISABLE-NEXT:    retl
 entry:
   %tmp = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %tmp, 0

Modified: llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll Thu Jun 13 06:56:19 2019
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-windows-gnu -exception-model=dwarf < %s | FileCheck %s
 
 %struct.A = type { [4096 x i8] }
@@ -6,6 +7,29 @@
 @b = common global i32 0, align 4
 
 define void @fn1() nounwind uwtable {
+; CHECK-LABEL: fn1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $4136, %eax # imm = 0x1028
+; CHECK-NEXT:    callq ___chkstk_ms
+; CHECK-NEXT:    subq %rax, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4144
+; CHECK-NEXT:    movl {{.*}}(%rip), %eax
+; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %select.true.sink
+; CHECK-NEXT:    cltq
+; CHECK-NEXT:    imulq $715827883, %rax, %rax # imm = 0x2AAAAAAB
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $63, %rcx
+; CHECK-NEXT:    shrq $32, %rax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:  .LBB0_2: # %select.end
+; CHECK-NEXT:    movl %eax, {{.*}}(%rip)
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    # kill: def $ecx killed $ecx killed $rcx
+; CHECK-NEXT:    callq fn2
+; CHECK-NEXT:    addq $4136, %rsp # imm = 0x1028
+; CHECK-NEXT:    retq
 entry:
   %ctx = alloca %struct.A, align 1
   %0 = load i32, i32* @a, align 4
@@ -26,11 +50,3 @@ declare void @fn2(i32)
 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 
-; CHECK-LABEL: fn1:
-; CHECK: pushq %rax
-; CHECK: movl $4128, %eax
-; CHECK: callq ___chkstk_ms
-; CHECK: subq %rax, %rsp
-; CHECK: movq 4128(%rsp), %rax
-
-; CHECK: addq $4136, %rsp

Modified: llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll Thu Jun 13 06:56:19 2019
@@ -1,6 +1,7 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -enable-shrink-wrap=true -pass-remarks-output=%t | FileCheck %s --check-prefix=ENABLE
 ; RUN: cat %t | FileCheck %s --check-prefix=REMARKS
-; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=DISABLE
 ;
 ; Note: Lots of tests use inline asm instead of regular calls.
 ; This allows to have a better control on what the allocation will do.
@@ -13,43 +14,38 @@ target triple = "x86_64-apple-macosx"
 
 
 ; Initial motivating example: Simple diamond with a call just on one side.
-; CHECK-LABEL: foo:
-;
-; Compare the arguments and jump to exit.
-; No prologue needed.
-; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
-; ENABLE-NEXT: cmpl %esi, %edi
-; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; (What we push does not matter. It should be some random sratch register.)
-; CHECK: pushq
-;
-; Compare the arguments and jump to exit.
-; After the prologue is set.
-; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
-; DISABLE-NEXT: cmpl %esi, %edi
-; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Store %a in the alloca.
-; CHECK: movl [[ARG0CPY]], 4(%rsp)
-; Set the alloca address in the second argument.
-; CHECK-NEXT: leaq 4(%rsp), %rsi
-; Set the first argument to zero.
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: callq _doSomething
-;
-; With shrink-wrapping, epilogue is just after the call.
-; ENABLE-NEXT: addq $8, %rsp
-;
-; CHECK: [[EXIT_LABEL]]:
-;
-; Without shrink-wrapping, epilogue is in the exit block.
-; Epilogue code. (What we pop does not matter.)
-; DISABLE-NEXT: popq
-;
-; CHECK-NEXT: retq
 define i32 @foo(i32 %a, i32 %b) {
+; ENABLE-LABEL: foo:
+; ENABLE:       ## %bb.0:
+; ENABLE-NEXT:    movl %edi, %eax
+; ENABLE-NEXT:    cmpl %esi, %edi
+; ENABLE-NEXT:    jge LBB0_2
+; ENABLE-NEXT:  ## %bb.1: ## %true
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
+; ENABLE-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; ENABLE-NEXT:    xorl %edi, %edi
+; ENABLE-NEXT:    callq _doSomething
+; ENABLE-NEXT:    addq $8, %rsp
+; ENABLE-NEXT:  LBB0_2: ## %false
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: foo:
+; DISABLE:       ## %bb.0:
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    movl %edi, %eax
+; DISABLE-NEXT:    cmpl %esi, %edi
+; DISABLE-NEXT:    jge LBB0_2
+; DISABLE-NEXT:  ## %bb.1: ## %true
+; DISABLE-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
+; DISABLE-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; DISABLE-NEXT:    xorl %edi, %edi
+; DISABLE-NEXT:    callq _doSomething
+; DISABLE-NEXT:  LBB0_2: ## %false
+; DISABLE-NEXT:    popq %rcx
+; DISABLE-NEXT:    retq
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
@@ -70,51 +66,69 @@ declare i32 @doSomething(i32, i32*)
 
 ; Check that we do not perform the restore inside the loop whereas the save
 ; is outside.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
-;
-; Shrink-wrapping allows to skip the prologue in the else case.
-; ENABLE: testl %edi, %edi
-; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: rbx.
-; CHECK: pushq %rbx
-;
-; DISABLE: testl %edi, %edi
-; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: xorl [[SUM:%eax]], [[SUM]]
-; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
-;
-; Next BB.
-; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body
-; CHECK: movl $1, [[TMP:%e[a-z]+]]
-; CHECK: addl [[TMP]], [[SUM]]
-; CHECK-NEXT: decl [[IV]]
-; CHECK-NEXT: jne [[LOOP]]
-;
-; Next BB.
-; SUM << 3.
-; CHECK: shll $3, [[SUM]]
-;
-; DISABLE: popq
-; DISABLE: retq
-;
-; DISABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one in returned register.
-; DISABLE: movl %esi, %eax
-; DISABLE: addl %esi, %eax
-;
-; Epilogue code.
-; CHECK-DAG: popq %rbx
-; CHECK: retq
-;
-; ENABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: movl %esi, %eax
-; ENABLE: addl %esi, %eax
-; ENABLE-NEXT: retq
 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    testl %edi, %edi
+; ENABLE-NEXT:    je LBB1_4
+; ENABLE-NEXT:  ## %bb.1: ## %for.preheader
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbx, -16
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    movl $10, %ecx
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB1_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    addl %edx, %eax
+; ENABLE-NEXT:    decl %ecx
+; ENABLE-NEXT:    jne LBB1_2
+; ENABLE-NEXT:  ## %bb.3: ## %for.end
+; ENABLE-NEXT:    shll $3, %eax
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB1_4: ## %if.else
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    addl %esi, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbx, -16
+; DISABLE-NEXT:    testl %edi, %edi
+; DISABLE-NEXT:    je LBB1_4
+; DISABLE-NEXT:  ## %bb.1: ## %for.preheader
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    movl $10, %ecx
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB1_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    addl %edx, %eax
+; DISABLE-NEXT:    decl %ecx
+; DISABLE-NEXT:    jne LBB1_2
+; DISABLE-NEXT:  ## %bb.3: ## %for.end
+; DISABLE-NEXT:    shll $3, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB1_4: ## %if.else
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    addl %esi, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -149,25 +163,58 @@ declare i32 @something(...)
 
 ; Check that we do not perform the shrink-wrapping inside the loop even
 ; though that would be legal. The cost model must prevent that.
-; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: rbx.
-; CHECK: pushq %rbx
-; CHECK: nop
-; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]]
-; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
-; Next BB.
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
-; CHECK: movl $1, [[TMP:%e[a-z]+]]
-; CHECK: addl [[TMP]], [[SUM]]
-; CHECK-NEXT: decl [[IV]]
-; CHECK-NEXT: jne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: ## %for.exit
-; CHECK: nop
-; CHECK: popq %rbx
-; CHECK-NEXT: retq
 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+; ENABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbx, -16
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    movl $10, %ecx
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB2_1: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    addl %edx, %eax
+; ENABLE-NEXT:    decl %ecx
+; ENABLE-NEXT:    jne LBB2_1
+; ENABLE-NEXT:  ## %bb.2: ## %for.exit
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: freqSaveAndRestoreOutsideLoop2:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbx, -16
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    movl $10, %ecx
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB2_1: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    addl %edx, %eax
+; DISABLE-NEXT:    decl %ecx
+; DISABLE-NEXT:    jne LBB2_1
+; DISABLE-NEXT:  ## %bb.2: ## %for.exit
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   br label %for.preheader
 
@@ -194,49 +241,75 @@ for.end:
 
 ; Check with a more complex case that we do not have save within the loop and
 ; restore outside.
-; CHECK-LABEL: loopInfoSaveOutsideLoop:
-;
-; ENABLE: testl %edi, %edi
-; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: rbx.
-; CHECK: pushq %rbx
-;
-; DISABLE: testl %edi, %edi
-; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: nop
-; CHECK: xorl [[SUM:%eax]], [[SUM]]
-; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
-; CHECK: movl $1, [[TMP:%e[a-z]+]]
-; CHECK: addl [[TMP]], [[SUM]]
-; CHECK-NEXT: decl [[IV]]
-; CHECK-NEXT: jne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: nop
-; CHECK: shll $3, [[SUM]]
-;
-; DISABLE: popq
-; DISABLE: retq
-;
-; DISABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one in returned register.
-; DISABLE: movl %esi, %eax
-; DISABLE: addl %esi, %eax
-;
-; Epilogue code.
-; CHECK-DAG: popq %rbx
-; CHECK: retq
-;
-; ENABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: movl %esi, %eax
-; ENABLE: addl %esi, %eax
-; ENABLE-NEXT: retq
 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+; ENABLE-LABEL: loopInfoSaveOutsideLoop:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    testl %edi, %edi
+; ENABLE-NEXT:    je LBB3_4
+; ENABLE-NEXT:  ## %bb.1: ## %for.preheader
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbx, -16
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    movl $10, %ecx
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB3_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    addl %edx, %eax
+; ENABLE-NEXT:    decl %ecx
+; ENABLE-NEXT:    jne LBB3_2
+; ENABLE-NEXT:  ## %bb.3: ## %for.end
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    shll $3, %eax
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB3_4: ## %if.else
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    addl %esi, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: loopInfoSaveOutsideLoop:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbx, -16
+; DISABLE-NEXT:    testl %edi, %edi
+; DISABLE-NEXT:    je LBB3_4
+; DISABLE-NEXT:  ## %bb.1: ## %for.preheader
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    movl $10, %ecx
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB3_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    addl %edx, %eax
+; DISABLE-NEXT:    decl %ecx
+; DISABLE-NEXT:    jne LBB3_2
+; DISABLE-NEXT:  ## %bb.3: ## %for.end
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    shll $3, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB3_4: ## %if.else
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    addl %esi, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -270,49 +343,65 @@ if.end:
 
 ; Check with a more complex case that we do not have restore within the loop and
 ; save outside.
-; CHECK-LABEL: loopInfoRestoreOutsideLoop:
-;
-; ENABLE: testl %edi, %edi
-; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: rbx.
-; CHECK: pushq %rbx
-;
-; DISABLE: testl %edi, %edi
-; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: nop
-; CHECK: xorl [[SUM:%eax]], [[SUM]]
-; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
-; CHECK: movl $1, [[TMP:%e[a-z]+]]
-; CHECK: addl [[TMP]], [[SUM]]
-; CHECK-NEXT: decl [[IV]]
-; CHECK-NEXT: jne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: shll $3, [[SUM]]
-;
-; DISABLE: popq
-; DISABLE: retq
-;
-; DISABLE: [[ELSE_LABEL]]: ## %if.else
-
-; Shift second argument by one in returned register.
-; DISABLE: movl %esi, %eax
-; DISABLE: addl %esi, %eax
-;
-; Epilogue code.
-; CHECK-DAG: popq %rbx
-; CHECK: retq
-;
-; ENABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: movl %esi, %eax
-; ENABLE: addl %esi, %eax
-; ENABLE-NEXT: retq
 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
+; ENABLE-LABEL: loopInfoRestoreOutsideLoop:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    testl %edi, %edi
+; ENABLE-NEXT:    je LBB4_4
+; ENABLE-NEXT:  ## %bb.1: ## %if.then
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    movl $10, %ecx
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB4_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    addl %edx, %eax
+; ENABLE-NEXT:    decl %ecx
+; ENABLE-NEXT:    jne LBB4_2
+; ENABLE-NEXT:  ## %bb.3: ## %for.end
+; ENABLE-NEXT:    shll $3, %eax
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB4_4: ## %if.else
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    addl %esi, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: loopInfoRestoreOutsideLoop:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    testl %edi, %edi
+; DISABLE-NEXT:    je LBB4_4
+; DISABLE-NEXT:  ## %bb.1: ## %if.then
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    movl $10, %ecx
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB4_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    addl %edx, %eax
+; DISABLE-NEXT:    decl %ecx
+; DISABLE-NEXT:    jne LBB4_2
+; DISABLE-NEXT:  ## %bb.3: ## %for.end
+; DISABLE-NEXT:    shll $3, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB4_4: ## %if.else
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    addl %esi, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -344,58 +433,86 @@ if.end:
 }
 
 ; Check that we handle function with no frame information correctly.
-; CHECK-LABEL: emptyFrame:
-; CHECK: ## %entry
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: retq
 define i32 @emptyFrame() {
+; ENABLE-LABEL: emptyFrame:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: emptyFrame:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    retq
 entry:
   ret i32 0
 }
 
 ; Check that we handle inline asm correctly.
-; CHECK-LABEL: inlineAsm:
-;
-; ENABLE: testl %edi, %edi
-; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; Make sure we save the CSR used in the inline asm: rbx.
-; CHECK: pushq %rbx
-;
-; DISABLE: testl %edi, %edi
-; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; CHECK: nop
-; CHECK: movl $10, [[IV:%e[a-z]+]]
-;
-; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
-; Inline asm statement.
-; CHECK: addl $1, %ebx
-; CHECK: decl [[IV]]
-; CHECK-NEXT: jne [[LOOP_LABEL]]
-; Next BB.
-; CHECK: nop
-; CHECK: xorl %eax, %eax
-;
-; DISABLE: popq
-; DISABLE: retq
-;
-; DISABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one in returned register.
-; DISABLE: movl %esi, %eax
-; DISABLE: addl %esi, %eax
-;
-; Epilogue code.
-; CHECK-DAG: popq %rbx
-; CHECK: retq
-;
-; ENABLE: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one and store into returned register.
-; ENABLE: movl %esi, %eax
-; ENABLE: addl %esi, %eax
-; ENABLE-NEXT: retq
 define i32 @inlineAsm(i32 %cond, i32 %N) {
+; ENABLE-LABEL: inlineAsm:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    testl %edi, %edi
+; ENABLE-NEXT:    je LBB6_4
+; ENABLE-NEXT:  ## %bb.1: ## %for.preheader
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbx, -16
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    movl $10, %eax
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB6_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    addl $1, %ebx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    decl %eax
+; ENABLE-NEXT:    jne LBB6_2
+; ENABLE-NEXT:  ## %bb.3: ## %for.exit
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB6_4: ## %if.else
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    addl %esi, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: inlineAsm:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbx, -16
+; DISABLE-NEXT:    testl %edi, %edi
+; DISABLE-NEXT:    je LBB6_4
+; DISABLE-NEXT:  ## %bb.1: ## %for.preheader
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    movl $10, %eax
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB6_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    addl $1, %ebx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    decl %eax
+; DISABLE-NEXT:    jne LBB6_2
+; DISABLE-NEXT:  ## %bb.3: ## %for.exit
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB6_4: ## %if.else
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    addl %esi, %eax
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %for.preheader
@@ -425,43 +542,55 @@ if.end:
 }
 
 ; Check that we handle calls to variadic functions correctly.
-; CHECK-LABEL: callVariadicFunc:
-;
-; ENABLE: movl %esi, %eax
-; ENABLE: testl %edi, %edi
-; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; CHECK: pushq
-;
-; DISABLE: movl %esi, %eax
-; DISABLE: testl %edi, %edi
-; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; Setup of the varags.
-; CHECK:       movl	%eax, (%rsp)
-; CHECK-NEXT:  movl	%eax, %edi
-; CHECK-NEXT:  movl	%eax, %esi
-; CHECK-NEXT:  movl	%eax, %edx
-; CHECK-NEXT:  movl	%eax, %ecx
-; CHECK-NEXT:  movl	%eax, %r8d
-; CHECK-NEXT:  movl	%eax, %r9d
-; CHECK-NEXT:  xorl	%eax, %eax
-; CHECK-NEXT: callq _someVariadicFunc
-; CHECK-NEXT: shll $3, %eax
-;
-; ENABLE-NEXT: addq $8, %rsp
-; ENABLE-NEXT: retq
-;
-
-; CHECK: [[ELSE_LABEL]]: ## %if.else
-; Shift second argument by one and store into returned register.
-; CHECK: addl %eax, %eax
-;
-; Epilogue code.
-; DISABLE-NEXT: popq
-; CHECK-NEXT: retq
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+; ENABLE-LABEL: callVariadicFunc:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    testl %edi, %edi
+; ENABLE-NEXT:    je LBB7_2
+; ENABLE-NEXT:  ## %bb.1: ## %if.then
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    movl %eax, (%rsp)
+; ENABLE-NEXT:    movl %eax, %edi
+; ENABLE-NEXT:    movl %eax, %esi
+; ENABLE-NEXT:    movl %eax, %edx
+; ENABLE-NEXT:    movl %eax, %ecx
+; ENABLE-NEXT:    movl %eax, %r8d
+; ENABLE-NEXT:    movl %eax, %r9d
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    callq _someVariadicFunc
+; ENABLE-NEXT:    shll $3, %eax
+; ENABLE-NEXT:    addq $8, %rsp
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB7_2: ## %if.else
+; ENABLE-NEXT:    addl %eax, %eax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: callVariadicFunc:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    testl %edi, %edi
+; DISABLE-NEXT:    je LBB7_2
+; DISABLE-NEXT:  ## %bb.1: ## %if.then
+; DISABLE-NEXT:    movl %eax, (%rsp)
+; DISABLE-NEXT:    movl %eax, %edi
+; DISABLE-NEXT:    movl %eax, %esi
+; DISABLE-NEXT:    movl %eax, %edx
+; DISABLE-NEXT:    movl %eax, %ecx
+; DISABLE-NEXT:    movl %eax, %r8d
+; DISABLE-NEXT:    movl %eax, %r9d
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    callq _someVariadicFunc
+; DISABLE-NEXT:    shll $3, %eax
+; DISABLE-NEXT:    popq %rcx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB7_2: ## %if.else
+; DISABLE-NEXT:    addl %eax, %eax
+; DISABLE-NEXT:    popq %rcx
+; DISABLE-NEXT:    retq
 entry:
   %tobool = icmp eq i32 %cond, 0
   br i1 %tobool, label %if.else, label %if.then
@@ -507,47 +636,80 @@ declare i32 @someVariadicFunc(i32, ...)
 
 declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)
 
-; CHECK-LABEL: useLEA:
-; DISABLE: pushq
-;
-; CHECK: testq   %rdi, %rdi
-; CHECK-NEXT: je      [[CLEANUP:LBB[0-9_]+]]
-;
-; CHECK: cmpw $66, (%rdi)
-; CHECK-NEXT: jne [[CLEANUP]]
-;
-; CHECK: movq 8(%rdi), %rdi
-; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]
-; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]
-; CHECK-NEXT: cmpl $14, [[TMP]]
-; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]
-;
-; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
-; CHECK-NEXT: btl [[TMP]], [[TMP2]]
-; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]]
-;
-; CHECK: [[CLEANUP]]: ## %cleanup
-; DISABLE: popq
-; CHECK-NEXT: retq
-;
-; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
-; CHECK: cmpl $134, %e[[BF_LOAD2]]
-; CHECK-NEXT: je [[CLEANUP]]
-;
-; CHECK: cmpl $140, %e[[BF_LOAD2]]
-; CHECK-NEXT: je [[CLEANUP]]
-;
-; ENABLE: pushq
-; CHECK: callq _find_temp_slot_from_address
-; CHECK-NEXT: testq   %rax, %rax
-;
-; The adjustment must use LEA here (or be moved above the test).
-; ENABLE-NEXT: leaq 8(%rsp), %rsp
-;
-; CHECK-NEXT: je [[CLEANUP]]
-;
-; CHECK: movb $1, 57(%rax)
 define void @useLEA(%struct.rtx_def* readonly %x) {
+; ENABLE-LABEL: useLEA:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    testq %rdi, %rdi
+; ENABLE-NEXT:    je LBB8_7
+; ENABLE-NEXT:  ## %bb.1: ## %if.end
+; ENABLE-NEXT:    cmpw $66, (%rdi)
+; ENABLE-NEXT:    jne LBB8_7
+; ENABLE-NEXT:  ## %bb.2: ## %lor.lhs.false
+; ENABLE-NEXT:    movq 8(%rdi), %rdi
+; ENABLE-NEXT:    movzwl (%rdi), %eax
+; ENABLE-NEXT:    leal -54(%rax), %ecx
+; ENABLE-NEXT:    cmpl $14, %ecx
+; ENABLE-NEXT:    ja LBB8_3
+; ENABLE-NEXT:  ## %bb.8: ## %lor.lhs.false
+; ENABLE-NEXT:    movl $24599, %edx ## imm = 0x6017
+; ENABLE-NEXT:    btl %ecx, %edx
+; ENABLE-NEXT:    jae LBB8_3
+; ENABLE-NEXT:  LBB8_7: ## %cleanup
+; ENABLE-NEXT:    popq %rax
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB8_3: ## %lor.lhs.false
+; ENABLE-NEXT:    cmpl $134, %eax
+; ENABLE-NEXT:    je LBB8_7
+; ENABLE-NEXT:  ## %bb.4: ## %lor.lhs.false
+; ENABLE-NEXT:    cmpl $140, %eax
+; ENABLE-NEXT:    je LBB8_7
+; ENABLE-NEXT:  ## %bb.5: ## %if.end.55
+; ENABLE-NEXT:    callq _find_temp_slot_from_address
+; ENABLE-NEXT:    testq %rax, %rax
+; ENABLE-NEXT:    je LBB8_7
+; ENABLE-NEXT:  ## %bb.6: ## %if.then.60
+; ENABLE-NEXT:    movb $1, 57(%rax)
+; ENABLE-NEXT:    popq %rax
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: useLEA:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    testq %rdi, %rdi
+; DISABLE-NEXT:    je LBB8_7
+; DISABLE-NEXT:  ## %bb.1: ## %if.end
+; DISABLE-NEXT:    cmpw $66, (%rdi)
+; DISABLE-NEXT:    jne LBB8_7
+; DISABLE-NEXT:  ## %bb.2: ## %lor.lhs.false
+; DISABLE-NEXT:    movq 8(%rdi), %rdi
+; DISABLE-NEXT:    movzwl (%rdi), %eax
+; DISABLE-NEXT:    leal -54(%rax), %ecx
+; DISABLE-NEXT:    cmpl $14, %ecx
+; DISABLE-NEXT:    ja LBB8_3
+; DISABLE-NEXT:  ## %bb.8: ## %lor.lhs.false
+; DISABLE-NEXT:    movl $24599, %edx ## imm = 0x6017
+; DISABLE-NEXT:    btl %ecx, %edx
+; DISABLE-NEXT:    jae LBB8_3
+; DISABLE-NEXT:  LBB8_7: ## %cleanup
+; DISABLE-NEXT:    popq %rax
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB8_3: ## %lor.lhs.false
+; DISABLE-NEXT:    cmpl $134, %eax
+; DISABLE-NEXT:    je LBB8_7
+; DISABLE-NEXT:  ## %bb.4: ## %lor.lhs.false
+; DISABLE-NEXT:    cmpl $140, %eax
+; DISABLE-NEXT:    je LBB8_7
+; DISABLE-NEXT:  ## %bb.5: ## %if.end.55
+; DISABLE-NEXT:    callq _find_temp_slot_from_address
+; DISABLE-NEXT:    testq %rax, %rax
+; DISABLE-NEXT:    je LBB8_7
+; DISABLE-NEXT:  ## %bb.6: ## %if.then.60
+; DISABLE-NEXT:    movb $1, 57(%rax)
+; DISABLE-NEXT:    popq %rax
+; DISABLE-NEXT:    retq
 entry:
   %cmp = icmp eq %struct.rtx_def* %x, null
   br i1 %cmp, label %cleanup, label %if.end
@@ -594,26 +756,31 @@ cleanup:
 ; Make sure we do not insert unreachable code after noreturn function.
 ; Although this is not incorrect to insert such code, it is useless
 ; and it hurts the binary size.
-;
-; CHECK-LABEL: noreturn:
-; DISABLE: pushq
-;
-; CHECK: testb   %dil, %dil
-; CHECK-NEXT: jne      [[ABORT:LBB[0-9_]+]]
-;
-; CHECK: movl $42, %eax
-;
-; DISABLE-NEXT: popq
-;
-; CHECK-NEXT: retq
-;
-; CHECK: [[ABORT]]: ## %if.abort
-;
-; ENABLE: pushq
-;
-; CHECK: callq _abort
-; ENABLE-NOT: popq
 define i32 @noreturn(i8 signext %bad_thing) {
+; ENABLE-LABEL: noreturn:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    testb %dil, %dil
+; ENABLE-NEXT:    jne LBB9_2
+; ENABLE-NEXT:  ## %bb.1: ## %if.end
+; ENABLE-NEXT:    movl $42, %eax
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB9_2: ## %if.abort
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    callq _abort
+;
+; DISABLE-LABEL: noreturn:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    testb %dil, %dil
+; DISABLE-NEXT:    jne LBB9_2
+; DISABLE-NEXT:  ## %bb.1: ## %if.end
+; DISABLE-NEXT:    movl $42, %eax
+; DISABLE-NEXT:    popq %rcx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB9_2: ## %if.abort
+; DISABLE-NEXT:    callq _abort
 entry:
   %tobool = icmp eq i8 %bad_thing, 0
   br i1 %tobool, label %if.end, label %if.abort
@@ -639,9 +806,70 @@ attributes #0 = { noreturn nounwind }
 ; should return gracefully and continue compilation.
 ; The only condition for this test is the compilation finishes correctly.
 ;
-; CHECK-LABEL: infiniteloop
-; CHECK: retq
 define void @infiniteloop() {
+; ENABLE-LABEL: infiniteloop:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbp, -16
+; ENABLE-NEXT:    movq %rsp, %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_register %rbp
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_offset %rbx, -24
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    testb %al, %al
+; ENABLE-NEXT:    jne LBB10_3
+; ENABLE-NEXT:  ## %bb.1: ## %if.then
+; ENABLE-NEXT:    movq %rsp, %rcx
+; ENABLE-NEXT:    addq $-16, %rcx
+; ENABLE-NEXT:    movq %rcx, %rsp
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB10_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    addl %edx, %eax
+; ENABLE-NEXT:    movl %eax, (%rcx)
+; ENABLE-NEXT:    jmp LBB10_2
+; ENABLE-NEXT:  LBB10_3: ## %if.end
+; ENABLE-NEXT:    leaq -8(%rbp), %rsp
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    popq %rbp
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: infiniteloop:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbp, -16
+; DISABLE-NEXT:    movq %rsp, %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_register %rbp
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_offset %rbx, -24
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    testb %al, %al
+; DISABLE-NEXT:    jne LBB10_3
+; DISABLE-NEXT:  ## %bb.1: ## %if.then
+; DISABLE-NEXT:    movq %rsp, %rcx
+; DISABLE-NEXT:    addq $-16, %rcx
+; DISABLE-NEXT:    movq %rcx, %rsp
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB10_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    addl %edx, %eax
+; DISABLE-NEXT:    movl %eax, (%rcx)
+; DISABLE-NEXT:    jmp LBB10_2
+; DISABLE-NEXT:  LBB10_3: ## %if.end
+; DISABLE-NEXT:    leaq -8(%rbp), %rsp
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    popq %rbp
+; DISABLE-NEXT:    retq
 entry:
   br i1 undef, label %if.then, label %if.end
 
@@ -661,9 +889,102 @@ if.end:
 }
 
 ; Another infinite loop test this time with a body bigger than just one block.
-; CHECK-LABEL: infiniteloop2
-; CHECK: retq
 define void @infiniteloop2() {
+; ENABLE-LABEL: infiniteloop2:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbp, -16
+; ENABLE-NEXT:    movq %rsp, %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_register %rbp
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_offset %rbx, -24
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    testb %al, %al
+; ENABLE-NEXT:    jne LBB11_5
+; ENABLE-NEXT:  ## %bb.1: ## %if.then
+; ENABLE-NEXT:    movq %rsp, %rcx
+; ENABLE-NEXT:    addq $-16, %rcx
+; ENABLE-NEXT:    movq %rcx, %rsp
+; ENABLE-NEXT:    xorl %edx, %edx
+; ENABLE-NEXT:    jmp LBB11_2
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB11_4: ## %body2
+; ENABLE-NEXT:    ## in Loop: Header=BB11_2 Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:  LBB11_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    movl %edx, %esi
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    movl $1, %edx
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    addl %esi, %edx
+; ENABLE-NEXT:    movl %edx, (%rcx)
+; ENABLE-NEXT:    testb %al, %al
+; ENABLE-NEXT:    jne LBB11_4
+; ENABLE-NEXT:  ## %bb.3: ## %body1
+; ENABLE-NEXT:    ## in Loop: Header=BB11_2 Depth=1
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    jmp LBB11_2
+; ENABLE-NEXT:  LBB11_5: ## %if.end
+; ENABLE-NEXT:    leaq -8(%rbp), %rsp
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    popq %rbp
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: infiniteloop2:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbp, -16
+; DISABLE-NEXT:    movq %rsp, %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_register %rbp
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_offset %rbx, -24
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    testb %al, %al
+; DISABLE-NEXT:    jne LBB11_5
+; DISABLE-NEXT:  ## %bb.1: ## %if.then
+; DISABLE-NEXT:    movq %rsp, %rcx
+; DISABLE-NEXT:    addq $-16, %rcx
+; DISABLE-NEXT:    movq %rcx, %rsp
+; DISABLE-NEXT:    xorl %edx, %edx
+; DISABLE-NEXT:    jmp LBB11_2
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB11_4: ## %body2
+; DISABLE-NEXT:    ## in Loop: Header=BB11_2 Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:  LBB11_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    movl %edx, %esi
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    movl $1, %edx
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    addl %esi, %edx
+; DISABLE-NEXT:    movl %edx, (%rcx)
+; DISABLE-NEXT:    testb %al, %al
+; DISABLE-NEXT:    jne LBB11_4
+; DISABLE-NEXT:  ## %bb.3: ## %body1
+; DISABLE-NEXT:    ## in Loop: Header=BB11_2 Depth=1
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    jmp LBB11_2
+; DISABLE-NEXT:  LBB11_5: ## %if.end
+; DISABLE-NEXT:    leaq -8(%rbp), %rsp
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    popq %rbp
+; DISABLE-NEXT:    retq
 entry:
   br i1 undef, label %if.then, label %if.end
 
@@ -691,9 +1012,70 @@ if.end:
 }
 
 ; Another infinite loop test this time with two nested infinite loop.
-; CHECK-LABEL: infiniteloop3
-; CHECK: retq
 define void @infiniteloop3() {
+; ENABLE-LABEL: infiniteloop3:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    testb %al, %al
+; ENABLE-NEXT:    jne LBB12_2
+; ENABLE-NEXT:  ## %bb.1: ## %body
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    testb %al, %al
+; ENABLE-NEXT:    jne LBB12_7
+; ENABLE-NEXT:  LBB12_2: ## %loop2a.preheader
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    xorl %ecx, %ecx
+; ENABLE-NEXT:    movq %rax, %rsi
+; ENABLE-NEXT:    jmp LBB12_4
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB12_3: ## %loop2b
+; ENABLE-NEXT:    ## in Loop: Header=BB12_4 Depth=1
+; ENABLE-NEXT:    movq %rdx, (%rsi)
+; ENABLE-NEXT:    movq %rdx, %rsi
+; ENABLE-NEXT:  LBB12_4: ## %loop1
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    movq %rcx, %rdx
+; ENABLE-NEXT:    testq %rax, %rax
+; ENABLE-NEXT:    movq (%rax), %rcx
+; ENABLE-NEXT:    jne LBB12_3
+; ENABLE-NEXT:  ## %bb.5: ## in Loop: Header=BB12_4 Depth=1
+; ENABLE-NEXT:    movq %rdx, %rax
+; ENABLE-NEXT:    movq %rdx, %rsi
+; ENABLE-NEXT:    jmp LBB12_4
+; ENABLE-NEXT:  LBB12_7: ## %end
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: infiniteloop3:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    testb %al, %al
+; DISABLE-NEXT:    jne LBB12_2
+; DISABLE-NEXT:  ## %bb.1: ## %body
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    testb %al, %al
+; DISABLE-NEXT:    jne LBB12_7
+; DISABLE-NEXT:  LBB12_2: ## %loop2a.preheader
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    xorl %ecx, %ecx
+; DISABLE-NEXT:    movq %rax, %rsi
+; DISABLE-NEXT:    jmp LBB12_4
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB12_3: ## %loop2b
+; DISABLE-NEXT:    ## in Loop: Header=BB12_4 Depth=1
+; DISABLE-NEXT:    movq %rdx, (%rsi)
+; DISABLE-NEXT:    movq %rdx, %rsi
+; DISABLE-NEXT:  LBB12_4: ## %loop1
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    movq %rcx, %rdx
+; DISABLE-NEXT:    testq %rax, %rax
+; DISABLE-NEXT:    movq (%rax), %rcx
+; DISABLE-NEXT:    jne LBB12_3
+; DISABLE-NEXT:  ## %bb.5: ## in Loop: Header=BB12_4 Depth=1
+; DISABLE-NEXT:    movq %rdx, %rax
+; DISABLE-NEXT:    movq %rdx, %rsi
+; DISABLE-NEXT:    jmp LBB12_4
+; DISABLE-NEXT:  LBB12_7: ## %end
+; DISABLE-NEXT:    retq
 entry:
   br i1 undef, label %loop2a, label %body
 
@@ -724,44 +1106,49 @@ end:
 
 ; Check that we just don't bail out on RegMask.
 ; In this case, the RegMask does not touch a CSR so we are good to go!
-; CHECK-LABEL: regmask:
-;
-; Compare the arguments and jump to exit.
-; No prologue needed.
-; ENABLE: cmpl %esi, %edi
-; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; Prologue code.
-; (What we push does not matter. It should be some random sratch register.)
-; CHECK: pushq
-;
-; Compare the arguments and jump to exit.
-; After the prologue is set.
-; DISABLE: cmpl %esi, %edi
-; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; CHECK: nop
-; Set the first argument to zero.
-; CHECK: xorl %edi, %edi
-; Set the second argument to addr.
-; CHECK-NEXT: movq %rdx, %rsi
-; CHECK-NEXT: callq _doSomething
-; CHECK-NEXT: popq
-; CHECK-NEXT: retq
-;
-; CHECK: [[EXIT_LABEL]]:
-; Set the first argument to 6.
-; CHECK-NEXT: movl $6, %edi
-; Set the second argument to addr.
-; CHECK-NEXT: movq %rdx, %rsi
-;
-; Without shrink-wrapping, we need to restore the stack before
-; making the tail call.
-; Epilogue code.
-; DISABLE-NEXT: popq
-;
-; CHECK-NEXT: jmp _doSomething
 define i32 @regmask(i32 %a, i32 %b, i32* %addr) {
+; ENABLE-LABEL: regmask:
+; ENABLE:       ## %bb.0:
+; ENABLE-NEXT:    cmpl %esi, %edi
+; ENABLE-NEXT:    jge LBB13_2
+; ENABLE-NEXT:  ## %bb.1: ## %true
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbx, -16
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    xorl %edi, %edi
+; ENABLE-NEXT:    movq %rdx, %rsi
+; ENABLE-NEXT:    callq _doSomething
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB13_2: ## %false
+; ENABLE-NEXT:    movl $6, %edi
+; ENABLE-NEXT:    movq %rdx, %rsi
+; ENABLE-NEXT:    jmp _doSomething ## TAILCALL
+;
+; DISABLE-LABEL: regmask:
+; DISABLE:       ## %bb.0:
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbx, -16
+; DISABLE-NEXT:    cmpl %esi, %edi
+; DISABLE-NEXT:    jge LBB13_2
+; DISABLE-NEXT:  ## %bb.1: ## %true
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    xorl %edi, %edi
+; DISABLE-NEXT:    movq %rdx, %rsi
+; DISABLE-NEXT:    callq _doSomething
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB13_2: ## %false
+; DISABLE-NEXT:    movl $6, %edi
+; DISABLE-NEXT:    movq %rdx, %rsi
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    jmp _doSomething ## TAILCALL
   %tmp2 = icmp slt i32 %a, %b
   br i1 %tmp2, label %true, label %false
 
@@ -791,41 +1178,76 @@ end:
 ; Note: The registers may change in the following patterns, but
 ; because they imply register hierarchy (e.g., eax, al) this is
 ; tricky to write robust patterns.
-;
-; CHECK-LABEL: useLEAForPrologue:
-;
-; Prologue is at the beginning of the function when shrink-wrapping
-; is disabled.
-; DISABLE: pushq
-; The stack adjustment can use SUB instr because we do not need to
-; preserve the EFLAGS at this point.
-; DISABLE-NEXT: subq $16, %rsp
-;
-; Load the value of b.
-; Create the zero value for the select assignment.
-; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
-; CHECK-NEXT: cmpb $0, _b(%rip)
-; CHECK-NEXT: movl $48, [[IMM_VAL:%ecx]]
-; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]]
-; CHECK-NEXT: movb %cl, _c(%rip)
-; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]]
-;
-; The code of the loop is not interesting.
-; [...]
-;
-; CHECK: [[VARFUNC_CALL]]:
-; Set the null parameter.
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: callq _varfunc
-;
-; Set the return value.
-; CHECK-NEXT: xorl %eax, %eax
-;
-; Epilogue code.
-; CHECK-NEXT: addq $16, %rsp
-; CHECK-NEXT: popq
-; CHECK-NEXT: retq
 define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 {
+; ENABLE-LABEL: useLEAForPrologue:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    subq $16, %rsp
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    cmpb $0, {{.*}}(%rip)
+; ENABLE-NEXT:    movl $48, %ecx
+; ENABLE-NEXT:    cmovnel %eax, %ecx
+; ENABLE-NEXT:    movb %cl, {{.*}}(%rip)
+; ENABLE-NEXT:    je LBB14_4
+; ENABLE-NEXT:  ## %bb.1: ## %for.body.lr.ph
+; ENABLE-NEXT:    ## InlineAsm Start
+; ENABLE-NEXT:    nop
+; ENABLE-NEXT:    ## InlineAsm End
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB14_2: ## %for.body
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    cmpl %esi, %edi
+; ENABLE-NEXT:    setl %al
+; ENABLE-NEXT:    xorl %esi, %esi
+; ENABLE-NEXT:    movb %al, %sil
+; ENABLE-NEXT:    incb %dl
+; ENABLE-NEXT:    cmpb $45, %dl
+; ENABLE-NEXT:    jl LBB14_2
+; ENABLE-NEXT:  ## %bb.3: ## %for.cond.for.end_crit_edge
+; ENABLE-NEXT:    movq _a@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    movl %esi, (%rax)
+; ENABLE-NEXT:  LBB14_4: ## %for.end
+; ENABLE-NEXT:    xorl %edi, %edi
+; ENABLE-NEXT:    callq _varfunc
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    addq $16, %rsp
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: useLEAForPrologue:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    subq $16, %rsp
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    cmpb $0, {{.*}}(%rip)
+; DISABLE-NEXT:    movl $48, %ecx
+; DISABLE-NEXT:    cmovnel %eax, %ecx
+; DISABLE-NEXT:    movb %cl, {{.*}}(%rip)
+; DISABLE-NEXT:    je LBB14_4
+; DISABLE-NEXT:  ## %bb.1: ## %for.body.lr.ph
+; DISABLE-NEXT:    ## InlineAsm Start
+; DISABLE-NEXT:    nop
+; DISABLE-NEXT:    ## InlineAsm End
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB14_2: ## %for.body
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    cmpl %esi, %edi
+; DISABLE-NEXT:    setl %al
+; DISABLE-NEXT:    xorl %esi, %esi
+; DISABLE-NEXT:    movb %al, %sil
+; DISABLE-NEXT:    incb %dl
+; DISABLE-NEXT:    cmpb $45, %dl
+; DISABLE-NEXT:    jl LBB14_2
+; DISABLE-NEXT:  ## %bb.3: ## %for.cond.for.end_crit_edge
+; DISABLE-NEXT:    movq _a@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    movl %esi, (%rax)
+; DISABLE-NEXT:  LBB14_4: ## %for.end
+; DISABLE-NEXT:    xorl %edi, %edi
+; DISABLE-NEXT:    callq _varfunc
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    addq $16, %rsp
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    retq
 entry:
   %tmp = alloca i3
   %.b = load i1, i1* @b, align 1
@@ -866,24 +1288,48 @@ declare i32 @varfunc(i8* nocapture reado
 ; TLS calls used to be wrongly model and shrink-wrapping would have inserted
 ; the prologue and epilogue just around the call to doSomething.
 ; PR25820.
-;
-; CHECK-LABEL: tlsCall:
-; CHECK: pushq
-; CHECK: testb $1, %dil
-; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]]
-;
-; master bb
-; CHECK: movq _sum1 at TLVP(%rip), %rdi
-; CHECK-NEXT: callq *(%rdi)
-; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]]
-;
-; [[ELSE_LABEL]]:
-; CHECK: callq _doSomething
-;
-; [[EXIT_LABEL]]:
-; CHECK: popq
-; CHECK-NEXT: retq
 define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 {
+; ENABLE-LABEL: tlsCall:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    testb $1, %dil
+; ENABLE-NEXT:    je LBB15_2
+; ENABLE-NEXT:  ## %bb.1: ## %master
+; ENABLE-NEXT:    movl (%rdx), %ecx
+; ENABLE-NEXT:    movq _sum1@{{.*}}(%rip), %rdi
+; ENABLE-NEXT:    callq *(%rdi)
+; ENABLE-NEXT:    movl %ecx, (%rax)
+; ENABLE-NEXT:    jmp LBB15_3
+; ENABLE-NEXT:  LBB15_2: ## %else
+; ENABLE-NEXT:    xorl %edi, %edi
+; ENABLE-NEXT:    xorl %esi, %esi
+; ENABLE-NEXT:    callq _doSomething
+; ENABLE-NEXT:    movl %eax, %esi
+; ENABLE-NEXT:  LBB15_3: ## %exit
+; ENABLE-NEXT:    movl %esi, %eax
+; ENABLE-NEXT:    popq %rcx
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: tlsCall:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    testb $1, %dil
+; DISABLE-NEXT:    je LBB15_2
+; DISABLE-NEXT:  ## %bb.1: ## %master
+; DISABLE-NEXT:    movl (%rdx), %ecx
+; DISABLE-NEXT:    movq _sum1@{{.*}}(%rip), %rdi
+; DISABLE-NEXT:    callq *(%rdi)
+; DISABLE-NEXT:    movl %ecx, (%rax)
+; DISABLE-NEXT:    jmp LBB15_3
+; DISABLE-NEXT:  LBB15_2: ## %else
+; DISABLE-NEXT:    xorl %edi, %edi
+; DISABLE-NEXT:    xorl %esi, %esi
+; DISABLE-NEXT:    callq _doSomething
+; DISABLE-NEXT:    movl %eax, %esi
+; DISABLE-NEXT:  LBB15_3: ## %exit
+; DISABLE-NEXT:    movl %esi, %eax
+; DISABLE-NEXT:    popq %rcx
+; DISABLE-NEXT:    retq
 entry:
   br i1 %bool1, label %master, label %else
 
@@ -913,17 +1359,6 @@ attributes #3 = { nounwind }
 ; and since we use that information to do the placement, we may end up
 ; inserting the prologue/epilogue at incorrect places.
 ; PR25988.
-;
-; CHECK-LABEL: irreducibleCFG:
-; CHECK: %entry
-; Make sure the prologue happens in the entry block.
-; CHECK-NEXT: pushq
-; ...
-; Make sure the epilogue happens in the exit block.
-; CHECK-NOT: popq
-; CHECK: popq
-; CHECK-NEXT: popq
-; CHECK-NEXT: retq
 ; Make sure we emit missed optimization remarks for this.
 ; REMARKS: Pass:            shrink-wrap
 ; REMARKS-NEXT: Name:            UnsupportedIrreducibleCFG
@@ -932,6 +1367,93 @@ attributes #3 = { nounwind }
 ; REMARKS-NEXT:   - String:          Irreducible CFGs are not supported yet
 
 define i32 @irreducibleCFG() #4 {
+; ENABLE-LABEL: irreducibleCFG:
+; ENABLE:       ## %bb.0: ## %entry
+; ENABLE-NEXT:    pushq %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_offset 16
+; ENABLE-NEXT:    .cfi_offset %rbp, -16
+; ENABLE-NEXT:    movq %rsp, %rbp
+; ENABLE-NEXT:    .cfi_def_cfa_register %rbp
+; ENABLE-NEXT:    pushq %rbx
+; ENABLE-NEXT:    pushq %rax
+; ENABLE-NEXT:    .cfi_offset %rbx, -24
+; ENABLE-NEXT:    movq _irreducibleCFGf@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    cmpb $0, (%rax)
+; ENABLE-NEXT:    je LBB16_2
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB16_1: ## %preheader
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    jmp LBB16_1
+; ENABLE-NEXT:  LBB16_2: ## %split
+; ENABLE-NEXT:    movq _irreducibleCFGb@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    movl (%rax), %eax
+; ENABLE-NEXT:    testl %eax, %eax
+; ENABLE-NEXT:    je LBB16_3
+; ENABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; ENABLE-NEXT:    movq _irreducibleCFGa@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    movl (%rax), %edi
+; ENABLE-NEXT:    xorl %ebx, %ebx
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    callq _something
+; ENABLE-NEXT:    jmp LBB16_5
+; ENABLE-NEXT:  LBB16_3:
+; ENABLE-NEXT:    xorl %ebx, %ebx
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB16_5: ## %for.inc
+; ENABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    incl %ebx
+; ENABLE-NEXT:    cmpl $7, %ebx
+; ENABLE-NEXT:    jl LBB16_5
+; ENABLE-NEXT:  ## %bb.6: ## %fn1.exit
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    addq $8, %rsp
+; ENABLE-NEXT:    popq %rbx
+; ENABLE-NEXT:    popq %rbp
+; ENABLE-NEXT:    retq
+;
+; DISABLE-LABEL: irreducibleCFG:
+; DISABLE:       ## %bb.0: ## %entry
+; DISABLE-NEXT:    pushq %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_offset 16
+; DISABLE-NEXT:    .cfi_offset %rbp, -16
+; DISABLE-NEXT:    movq %rsp, %rbp
+; DISABLE-NEXT:    .cfi_def_cfa_register %rbp
+; DISABLE-NEXT:    pushq %rbx
+; DISABLE-NEXT:    pushq %rax
+; DISABLE-NEXT:    .cfi_offset %rbx, -24
+; DISABLE-NEXT:    movq _irreducibleCFGf@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    cmpb $0, (%rax)
+; DISABLE-NEXT:    je LBB16_2
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB16_1: ## %preheader
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    jmp LBB16_1
+; DISABLE-NEXT:  LBB16_2: ## %split
+; DISABLE-NEXT:    movq _irreducibleCFGb@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    movl (%rax), %eax
+; DISABLE-NEXT:    testl %eax, %eax
+; DISABLE-NEXT:    je LBB16_3
+; DISABLE-NEXT:  ## %bb.4: ## %for.body4.i
+; DISABLE-NEXT:    movq _irreducibleCFGa@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    movl (%rax), %edi
+; DISABLE-NEXT:    xorl %ebx, %ebx
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    callq _something
+; DISABLE-NEXT:    jmp LBB16_5
+; DISABLE-NEXT:  LBB16_3:
+; DISABLE-NEXT:    xorl %ebx, %ebx
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB16_5: ## %for.inc
+; DISABLE-NEXT:    ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    incl %ebx
+; DISABLE-NEXT:    cmpl $7, %ebx
+; DISABLE-NEXT:    jl LBB16_5
+; DISABLE-NEXT:  ## %bb.6: ## %fn1.exit
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    addq $8, %rsp
+; DISABLE-NEXT:    popq %rbx
+; DISABLE-NEXT:    popq %rbp
+; DISABLE-NEXT:    retq
 entry:
   %i0 = load i32, i32* @irreducibleCFGa, align 4
   %.pr = load i8, i8* @irreducibleCFGf, align 1
@@ -978,20 +1500,60 @@ attributes #4 = { "no-frame-pointer-elim
 ; loop, which can occur into a misplacement of the restore block, if we're
 ; looking for the nearest common post-dominator of an "unreachable" block.
 
-; CHECK-LABEL: infiniteLoopNoSuccessor:
-; CHECK: ## %bb.0:
-; Make sure the prologue happens in the entry block.
-; CHECK-NEXT: pushq %rbp
-; ...
-; Make sure we don't shrink-wrap.
-; CHECK: ## %bb.1
-; CHECK-NOT: pushq %rbp
-; ...
-; Make sure the epilogue happens in the exit block.
-; CHECK: ## %bb.5
-; CHECK: popq %rbp
-; CHECK-NEXT: retq
 define void @infiniteLoopNoSuccessor() #5 {
+; ENABLE-LABEL: infiniteLoopNoSuccessor:
+; ENABLE:       ## %bb.0:
+; ENABLE-NEXT:    pushq %rbp
+; ENABLE-NEXT:    movq %rsp, %rbp
+; ENABLE-NEXT:    movq _x@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    cmpl $0, (%rax)
+; ENABLE-NEXT:    je LBB17_2
+; ENABLE-NEXT:  ## %bb.1:
+; ENABLE-NEXT:    movl $0, (%rax)
+; ENABLE-NEXT:  LBB17_2:
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    callq _somethingElse
+; ENABLE-NEXT:    movq _y@{{.*}}(%rip), %rax
+; ENABLE-NEXT:    cmpl $0, (%rax)
+; ENABLE-NEXT:    je LBB17_3
+; ENABLE-NEXT:  ## %bb.5:
+; ENABLE-NEXT:    popq %rbp
+; ENABLE-NEXT:    retq
+; ENABLE-NEXT:  LBB17_3:
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    callq _something
+; ENABLE-NEXT:    .p2align 4, 0x90
+; ENABLE-NEXT:  LBB17_4: ## =>This Inner Loop Header: Depth=1
+; ENABLE-NEXT:    xorl %eax, %eax
+; ENABLE-NEXT:    callq _somethingElse
+; ENABLE-NEXT:    jmp LBB17_4
+;
+; DISABLE-LABEL: infiniteLoopNoSuccessor:
+; DISABLE:       ## %bb.0:
+; DISABLE-NEXT:    pushq %rbp
+; DISABLE-NEXT:    movq %rsp, %rbp
+; DISABLE-NEXT:    movq _x@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    cmpl $0, (%rax)
+; DISABLE-NEXT:    je LBB17_2
+; DISABLE-NEXT:  ## %bb.1:
+; DISABLE-NEXT:    movl $0, (%rax)
+; DISABLE-NEXT:  LBB17_2:
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    callq _somethingElse
+; DISABLE-NEXT:    movq _y@{{.*}}(%rip), %rax
+; DISABLE-NEXT:    cmpl $0, (%rax)
+; DISABLE-NEXT:    je LBB17_3
+; DISABLE-NEXT:  ## %bb.5:
+; DISABLE-NEXT:    popq %rbp
+; DISABLE-NEXT:    retq
+; DISABLE-NEXT:  LBB17_3:
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    callq _something
+; DISABLE-NEXT:    .p2align 4, 0x90
+; DISABLE-NEXT:  LBB17_4: ## =>This Inner Loop Header: Depth=1
+; DISABLE-NEXT:    xorl %eax, %eax
+; DISABLE-NEXT:    callq _somethingElse
+; DISABLE-NEXT:    jmp LBB17_4
   %1 = load i32, i32* @x, align 4
   %2 = icmp ne i32 %1, 0
   br i1 %2, label %3, label %4

Modified: llvm/trunk/test/CodeGen/X86/xchg-nofold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xchg-nofold.ll?rev=363265&r1=363264&r2=363265&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xchg-nofold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xchg-nofold.ll Thu Jun 13 06:56:19 2019
@@ -9,6 +9,7 @@
 define zeroext i1 @_Z3fooRSt6atomicIbEb(%"struct.std::atomic"* nocapture dereferenceable(1) %a, i1 returned zeroext %b) nounwind {
 ; CHECK-LABEL: _Z3fooRSt6atomicIbEb:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    movq %rdi, %rcx
 ; CHECK-NEXT:    shrq $3, %rcx
@@ -24,9 +25,9 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    xchgb %cl, (%rdi)
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __asan_report_store1
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP




More information about the llvm-commits mailing list