[llvm] 4e2b4f9 - [ShrinkWrap] Use underlying object to rule out stack access.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed May 3 01:28:59 PDT 2023


Author: Florian Hahn
Date: 2023-05-03T09:28:07+01:00
New Revision: 4e2b4f97a09500fb6ceb4f077c492fac056a6a0a

URL: https://github.com/llvm/llvm-project/commit/4e2b4f97a09500fb6ceb4f077c492fac056a6a0a
DIFF: https://github.com/llvm/llvm-project/commit/4e2b4f97a09500fb6ceb4f077c492fac056a6a0a.diff

LOG: [ShrinkWrap] Use underlying object to rule out stack access.

Allow shrink-wrapping past memory accesses that only access globals or
function arguments. This patch uses getUnderlyingObject to try to
identify the accessed object by a given memory operand. If it is a
global or an argument, it does not access the stack of the current
function and should not block shrink wrapping.

Note that the caller's stack may get accessed when passing an argument
via the stack, but not the stack of the current function.

This addresses part of the TODO from D63152.

Reviewed By: thegameg

Differential Revision: https://reviews.llvm.org/D149668

Added: 
    

Modified: 
    llvm/lib/CodeGen/ShrinkWrap.cpp
    llvm/test/CodeGen/AArch64/addsub.ll
    llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
    llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
    llvm/test/CodeGen/AArch64/csr-split.ll
    llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
    llvm/test/CodeGen/AArch64/taildup-cfi.ll
    llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
    llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
    llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
    llvm/test/CodeGen/RISCV/pr51206.ll
    llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
    llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
    llvm/test/CodeGen/X86/cmov.ll
    llvm/test/CodeGen/X86/copy-eflags.ll
    llvm/test/CodeGen/X86/csr-split.ll
    llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll
    llvm/test/CodeGen/X86/inline-asm-flag-output.ll
    llvm/test/CodeGen/X86/peep-test-5.ll
    llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
    llvm/test/CodeGen/X86/statepoint-vector.ll
    llvm/test/DebugInfo/X86/merge_inlined_loc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 292a37b8264cb..b219b83bbc2fe 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -53,6 +53,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -259,13 +260,30 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
 
 bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
                                  RegScavenger *RS) const {
+  /// Check if \p Op is known to access an address not on the function's stack .
+  /// At the moment, accesses where the underlying object is a global or a
+  /// function argument are considered non-stack accesses. Note that the
+  /// caller's stack may get accessed when passing an argument via the stack,
+  /// but not the stack of the current function.
+  ///
+  auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
+    if (Op->getValue()) {
+      const Value *UO = getUnderlyingObject(Op->getValue());
+      if (!UO)
+        return false;
+      if (auto *Arg = dyn_cast<Argument>(UO))
+        return !Arg->hasPassPointeeByValueCopyAttr();
+      return isa<GlobalValue>(UO);
+    }
+    return false;
+  };
   // This prevents premature stack popping when occurs a indirect stack
-  // access. It is overly aggressive for the moment.
-  // TODO: - Obvious non-stack loads and store, such as global values,
-  //         are known to not access the stack.
+  // access.  It is overly aggressive for the moment.
+  // TODO:
   //       - Further, data dependency and alias analysis can validate
   //         that load and stores never derive from the stack pointer.
-  if (MI.mayLoadOrStore())
+  if (MI.mayLoadOrStore() && (MI.isCall() || MI.hasUnmodeledSideEffects() ||
+                              !all_of(MI.memoperands(), IsKnownNonStackPtr)))
     return true;
 
   if (MI.getOpcode() == FrameSetupOpcode ||

diff  --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 3848a3304c7dc..430462fadefa3 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -651,9 +651,6 @@ declare dso_local i32 @crng_reseed(...) local_unnamed_addr
 define dso_local i32 @_extract_crng_crng() {
 ; CHECK-LABEL: _extract_crng_crng:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, _extract_crng_crng
 ; CHECK-NEXT:    add x8, x8, :lo12:_extract_crng_crng
 ; CHECK-NEXT:    tbnz x8, #63, .LBB36_2
@@ -665,6 +662,9 @@ define dso_local i32 @_extract_crng_crng() {
 ; CHECK-NEXT:    cmn x8, #1272
 ; CHECK-NEXT:    b.pl .LBB36_3
 ; CHECK-NEXT:  .LBB36_2: // %if.then
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    adrp x8, primary_crng
 ; CHECK-NEXT:    adrp x9, input_pool
 ; CHECK-NEXT:    add x9, x9, :lo12:input_pool
@@ -672,8 +672,10 @@ define dso_local i32 @_extract_crng_crng() {
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel x0, xzr, x9, eq
 ; CHECK-NEXT:    bl crng_reseed
-; CHECK-NEXT:  .LBB36_3: // %if.end
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:  .LBB36_3: // %if.end
+; CHECK-NEXT:    .cfi_def_cfa wsp, 0
+; CHECK-NEXT:    .cfi_same_value w30
 ; CHECK-NEXT:    ret
 entry:
   br i1 icmp slt (ptr @_extract_crng_crng, ptr null), label %if.then, label %lor.lhs.false

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 79d3291b2fa97..c47f9926e936b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -1004,25 +1004,28 @@ end:
 define i32 @stack_realign(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) {
 ; ENABLE-LABEL: stack_realign:
 ; ENABLE:       ; %bb.0:
+; ENABLE-NEXT:    lsl w8, w0, w1
+; ENABLE-NEXT:    lsl w9, w1, w0
+; ENABLE-NEXT:    cmp w0, w1
+; ENABLE-NEXT:    b.ge LBB13_2
+; ENABLE-NEXT:  ; %bb.1: ; %true
 ; ENABLE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; ENABLE-NEXT:    .cfi_def_cfa_offset 16
 ; ENABLE-NEXT:    mov x29, sp
 ; ENABLE-NEXT:    .cfi_def_cfa w29, 16
 ; ENABLE-NEXT:    .cfi_offset w30, -8
 ; ENABLE-NEXT:    .cfi_offset w29, -16
-; ENABLE-NEXT:    sub x9, sp, #16
-; ENABLE-NEXT:    and sp, x9, #0xffffffffffffffe0
-; ENABLE-NEXT:    lsl w8, w0, w1
-; ENABLE-NEXT:    lsl w9, w1, w0
-; ENABLE-NEXT:    cmp w0, w1
-; ENABLE-NEXT:    b.ge LBB13_2
-; ENABLE-NEXT:  ; %bb.1: ; %true
+; ENABLE-NEXT:    sub x1, sp, #16
+; ENABLE-NEXT:    and sp, x1, #0xffffffffffffffe0
 ; ENABLE-NEXT:    str w0, [sp]
+; ENABLE-NEXT:    mov sp, x29
+; ENABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; ENABLE-NEXT:  LBB13_2: ; %false
+; ENABLE-NEXT:    .cfi_def_cfa wsp, 0
+; ENABLE-NEXT:    .cfi_same_value w30
+; ENABLE-NEXT:    .cfi_same_value w29
 ; ENABLE-NEXT:    str w8, [x2]
 ; ENABLE-NEXT:    str w9, [x3]
-; ENABLE-NEXT:    mov sp, x29
-; ENABLE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; ENABLE-NEXT:    ret
 ;
 ; DISABLE-LABEL: stack_realign:

diff  --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 4a4f0571fb4fc..101e5696c70f9 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -658,28 +658,28 @@ return:                                           ; preds = %if.end, %land.lhs.t
 define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
 ; CHECK-LABEL: fcmpri:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_offset b8, -32
 ; CHECK-NEXT:    cmp w0, #2
 ; CHECK-NEXT:    b.lt .LBB9_3
 ; CHECK-NEXT:  // %bb.1: // %land.lhs.true
 ; CHECK-NEXT:    ldr x8, [x1, #8]
 ; CHECK-NEXT:    cbz x8, .LBB9_3
 ; CHECK-NEXT:  // %bb.2:
-; CHECK-NEXT:    mov w0, #3
-; CHECK-NEXT:    b .LBB9_4
+; CHECK-NEXT:    mov w0, #3 // =0x3
+; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB9_3: // %if.end
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    bl zoo
 ; CHECK-NEXT:    mov w19, w0
-; CHECK-NEXT:    mov w0, #-1
+; CHECK-NEXT:    mov w0, #-1 // =0xffffffff
 ; CHECK-NEXT:    bl yoo
 ; CHECK-NEXT:    cmp w19, #0
-; CHECK-NEXT:    mov w1, #2
+; CHECK-NEXT:    mov w1, #2 // =0x2
 ; CHECK-NEXT:    cinc w0, w19, gt
 ; CHECK-NEXT:    fmov d8, d0
 ; CHECK-NEXT:    bl xoo
@@ -689,9 +689,8 @@ define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
 ; CHECK-NEXT:    fadd d0, d8, d0
 ; CHECK-NEXT:    fcsel d0, d8, d0, gt
 ; CHECK-NEXT:    bl woo
-; CHECK-NEXT:    mov w0, #4
-; CHECK-NEXT:  .LBB9_4: // %return
 ; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, #4 // =0x4
 ; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w19

diff  --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll
index 9143ddd545cdf..da73c3a13a2e9 100644
--- a/llvm/test/CodeGen/AArch64/csr-split.ll
+++ b/llvm/test/CodeGen/AArch64/csr-split.ll
@@ -9,23 +9,17 @@
 define dso_local signext i32 @test1(ptr %b) local_unnamed_addr uwtable  {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_remember_state
 ; CHECK-NEXT:    adrp x8, a
 ; CHECK-NEXT:    ldrsw x8, [x8, :lo12:a]
 ; CHECK-NEXT:    cmp x8, x0
 ; CHECK-NEXT:    b.eq .LBB0_2
 ; CHECK-NEXT:  // %bb.1: // %if.end
-; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NEXT:    .cfi_restore w19
-; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: // %if.then
-; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    bl callVoid
 ; CHECK-NEXT:    mov x0, x19
@@ -37,14 +31,6 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr uwtable  {
 ;
 ; CHECK-APPLE-LABEL: test1:
 ; CHECK-APPLE:       ; %bb.0: ; %entry
-; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
-; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
-; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
-; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
-; CHECK-APPLE-NEXT:    .cfi_remember_state
 ; CHECK-APPLE-NEXT:  Lloh0:
 ; CHECK-APPLE-NEXT:    adrp x8, _a at PAGE
 ; CHECK-APPLE-NEXT:  Lloh1:
@@ -52,16 +38,15 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr uwtable  {
 ; CHECK-APPLE-NEXT:    cmp x8, x0
 ; CHECK-APPLE-NEXT:    b.eq LBB0_2
 ; CHECK-APPLE-NEXT:  ; %bb.1: ; %if.end
-; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; CHECK-APPLE-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-APPLE-NEXT:    .cfi_restore w30
-; CHECK-APPLE-NEXT:    .cfi_restore w29
-; CHECK-APPLE-NEXT:    .cfi_restore w19
-; CHECK-APPLE-NEXT:    .cfi_restore w20
 ; CHECK-APPLE-NEXT:    ret
 ; CHECK-APPLE-NEXT:  LBB0_2: ; %if.then
-; CHECK-APPLE-NEXT:    .cfi_restore_state
+; CHECK-APPLE-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT:    .cfi_offset w30, -8
+; CHECK-APPLE-NEXT:    .cfi_offset w29, -16
+; CHECK-APPLE-NEXT:    .cfi_offset w19, -24
+; CHECK-APPLE-NEXT:    .cfi_offset w20, -32
 ; CHECK-APPLE-NEXT:    mov x19, x0
 ; CHECK-APPLE-NEXT:    bl _callVoid
 ; CHECK-APPLE-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
index 16dc402b43021..23f28b1e9ce1a 100644
--- a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
+++ b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll
@@ -12,13 +12,6 @@ declare void @fn(ptr, ptr)
 define void @test_regular_pointers(ptr %a, ptr %b) {
 ; CHECK-LABEL: test_regular_pointers:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    .cfi_offset w19, -24
-; CHECK-NEXT:    .cfi_offset w20, -32
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    mov x8, #1 ; =0x1
 ; CHECK-NEXT:    ldr d1, [x1, #8]
@@ -30,12 +23,24 @@ define void @test_regular_pointers(ptr %a, ptr %b) {
 ; CHECK-NEXT:    b.mi LBB0_2
 ; CHECK-NEXT:    b.gt LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %then
+; CHECK-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w19, -24
+; CHECK-NEXT:    .cfi_offset w20, -32
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    bl _fn
-; CHECK-NEXT:    str xzr, [x19]
-; CHECK-NEXT:  LBB0_2: ; %exit
 ; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    str xzr, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-NEXT:  LBB0_2: ; %exit
+; CHECK-NEXT:    .cfi_def_cfa wsp, 0
+; CHECK-NEXT:    .cfi_same_value w30
+; CHECK-NEXT:    .cfi_same_value w29
+; CHECK-NEXT:    .cfi_same_value w19
+; CHECK-NEXT:    .cfi_same_value w20
 ; CHECK-NEXT:    ret
 entry:
   %l.a = load double, ptr %a, align 8

diff  --git a/llvm/test/CodeGen/AArch64/taildup-cfi.ll b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
index 23b4891752d2c..221503009cdb6 100644
--- a/llvm/test/CodeGen/AArch64/taildup-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/taildup-cfi.ll
@@ -32,7 +32,7 @@ if.then:                                          ; preds = %entry
   store i32 0, ptr @f, align 4, !tbaa !2
   br label %if.end
 
-; DARWIN:           Merging into block
+; DARWIN-NOT:           Merging into block
 ; LINUX:    	      Merging into block
 
 if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then

diff  --git a/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll b/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
index 2d9a9ab477880..811ed1b1d6054 100644
--- a/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
+++ b/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
@@ -7,38 +7,38 @@
 define dso_local zeroext i8 @test1(ptr noundef %addr, i8 noundef zeroext %newval) local_unnamed_addr #0 {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stbcx. 4, 0, 3
+; CHECK-NEXT:    bne 0, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    stdu 1, -32(1)
 ; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    stbcx. 4, 0, 3
-; CHECK-NEXT:    bne 0, .LBB0_2
-; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl dummy
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:  .LBB0_2: # %if.end
-; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    addi 1, 1, 32
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:  .LBB0_2: # %if.end
+; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: test1:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stdu 1, -112(1)
-; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stbcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB0_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    bl .dummy[PR]
 ; CHECK-AIX-NEXT:    nop
-; CHECK-AIX-NEXT:  L..BB0_2: # %if.end
-; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    addi 1, 1, 112
 ; CHECK-AIX-NEXT:    ld 0, 16(1)
 ; CHECK-AIX-NEXT:    mtlr 0
+; CHECK-AIX-NEXT:  L..BB0_2: # %if.end
+; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    blr
 entry:
   %conv = zext i8 %newval to i32
@@ -57,38 +57,38 @@ if.end:                                           ; preds = %if.then, %entry
 define dso_local signext i16 @test2(ptr noundef %addr, i16 noundef signext %newval) local_unnamed_addr #0 {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sthcx. 4, 0, 3
+; CHECK-NEXT:    bne 0, .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    stdu 1, -32(1)
 ; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    sthcx. 4, 0, 3
-; CHECK-NEXT:    bne 0, .LBB1_2
-; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl dummy
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:  .LBB1_2: # %if.end
-; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    addi 1, 1, 32
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:  .LBB1_2: # %if.end
+; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: test2:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stdu 1, -112(1)
-; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    sthcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB1_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    bl .dummy[PR]
 ; CHECK-AIX-NEXT:    nop
-; CHECK-AIX-NEXT:  L..BB1_2: # %if.end
-; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    addi 1, 1, 112
 ; CHECK-AIX-NEXT:    ld 0, 16(1)
 ; CHECK-AIX-NEXT:    mtlr 0
+; CHECK-AIX-NEXT:  L..BB1_2: # %if.end
+; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    blr
 entry:
   %0 = sext i16 %newval to i32
@@ -107,38 +107,38 @@ if.end:                                           ; preds = %if.then, %entry
 define dso_local signext i32 @test3(ptr noundef %addr, i32 noundef signext %newval) local_unnamed_addr #0 {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stwcx. 4, 0, 3
+; CHECK-NEXT:    bne 0, .LBB2_2
+; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    stdu 1, -32(1)
 ; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    stwcx. 4, 0, 3
-; CHECK-NEXT:    bne 0, .LBB2_2
-; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl dummy
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:  .LBB2_2: # %if.end
-; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    addi 1, 1, 32
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:  .LBB2_2: # %if.end
+; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: test3:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stdu 1, -112(1)
-; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stwcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB2_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    bl .dummy[PR]
 ; CHECK-AIX-NEXT:    nop
-; CHECK-AIX-NEXT:  L..BB2_2: # %if.end
-; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    addi 1, 1, 112
 ; CHECK-AIX-NEXT:    ld 0, 16(1)
 ; CHECK-AIX-NEXT:    mtlr 0
+; CHECK-AIX-NEXT:  L..BB2_2: # %if.end
+; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.stwcx(ptr %addr, i32 %newval)
@@ -156,38 +156,38 @@ if.end:                                           ; preds = %if.then, %entry
 define dso_local i64 @test4(ptr noundef %addr, i64 noundef %newval) local_unnamed_addr #0 {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stdcx. 4, 0, 3
+; CHECK-NEXT:    bne 0, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    stdu 1, -32(1)
 ; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    stdcx. 4, 0, 3
-; CHECK-NEXT:    bne 0, .LBB3_2
-; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    bl dummy
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:  .LBB3_2: # %if.end
-; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    addi 1, 1, 32
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:  .LBB3_2: # %if.end
+; CHECK-NEXT:    li 3, 55
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: test4:
 ; CHECK-AIX:       # %bb.0: # %entry
-; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stdu 1, -112(1)
-; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stdcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB3_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    bl .dummy[PR]
 ; CHECK-AIX-NEXT:    nop
-; CHECK-AIX-NEXT:  L..BB3_2: # %if.end
-; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    addi 1, 1, 112
 ; CHECK-AIX-NEXT:    ld 0, 16(1)
 ; CHECK-AIX-NEXT:    mtlr 0
+; CHECK-AIX-NEXT:  L..BB3_2: # %if.end
+; CHECK-AIX-NEXT:    li 3, 55
 ; CHECK-AIX-NEXT:    blr
 entry:
   %0 = tail call i32 @llvm.ppc.stdcx(ptr %addr, i64 %newval)

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 94beed98d2457..5d2232319c1f5 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -10,6 +10,14 @@ target triple = "powerpc64le-unknown-linux-gnu"
 define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.vy02, ptr %.vy03, ptr %.vy04, ptr %.vy05, ptr %.vy06, ptr %.vy07, ptr %.vy08, ptr %.vy09, ptr %.vy0a, ptr %.vy0b, ptr %.vy0c, ptr %.vy21, ptr %.vy22, ptr %.vy23, ptr %.vy24, ptr %.vy25, ptr %.vy26, ptr %.vy27, ptr %.vy28, ptr %.vy29, ptr %.vy2a, ptr %.vy2b, ptr %.vy2c) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lwz 4, 0(4)
+; CHECK-NEXT:    cmpwi 4, 1
+; CHECK-NEXT:    bltlr 0
+; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
+; CHECK-NEXT:    lwz 3, 0(3)
+; CHECK-NEXT:    cmpwi 3, 1
+; CHECK-NEXT:    bltlr 0
+; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
 ; CHECK-NEXT:    stdu 1, -592(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 592
 ; CHECK-NEXT:    .cfi_offset r14, -192
@@ -48,34 +56,92 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
 ; CHECK-NEXT:    .cfi_offset v29, -240
 ; CHECK-NEXT:    .cfi_offset v30, -224
 ; CHECK-NEXT:    .cfi_offset v31, -208
-; CHECK-NEXT:    lwz 4, 0(4)
-; CHECK-NEXT:    std 14, 400(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 408(1) # 8-byte Folded Spill
-; CHECK-NEXT:    cmpwi 4, 1
-; CHECK-NEXT:    std 16, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, 464(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 472(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 22, 5
+; CHECK-NEXT:    ld 5, 848(1)
+; CHECK-NEXT:    mr 11, 7
+; CHECK-NEXT:    ld 23, 688(1)
+; CHECK-NEXT:    addi 3, 3, 1
+; CHECK-NEXT:    ld 2, 760(1)
+; CHECK-NEXT:    std 28, 512(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 520(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 28, 824(1)
+; CHECK-NEXT:    ld 7, 728(1)
 ; CHECK-NEXT:    std 18, 432(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 19, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 18, 6
+; CHECK-NEXT:    ld 6, 712(1)
+; CHECK-NEXT:    cmpldi 3, 9
+; CHECK-NEXT:    ld 19, 768(1)
+; CHECK-NEXT:    std 10, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 6, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 5, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 5, 840(1)
+; CHECK-NEXT:    lxv 33, 0(6)
+; CHECK-NEXT:    std 14, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 408(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 528(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 31, 536(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 20, 448(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 21, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 22, 464(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, 472(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 21, 784(1)
+; CHECK-NEXT:    ld 20, 776(1)
+; CHECK-NEXT:    lxv 10, 0(19)
+; CHECK-NEXT:    lxv 7, 0(21)
+; CHECK-NEXT:    ld 15, 736(1)
+; CHECK-NEXT:    ld 29, 704(1)
+; CHECK-NEXT:    ld 30, 720(1)
+; CHECK-NEXT:    std 2, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 42, 0(9)
+; CHECK-NEXT:    lxv 37, 0(7)
+; CHECK-NEXT:    std 20, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 43, 0(8)
+; CHECK-NEXT:    lxv 41, 0(10)
+; CHECK-NEXT:    std 16, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 17, 752(1)
+; CHECK-NEXT:    ld 16, 744(1)
 ; CHECK-NEXT:    std 24, 480(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 25, 488(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 25, 800(1)
+; CHECK-NEXT:    ld 24, 792(1)
 ; CHECK-NEXT:    std 26, 496(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 27, 504(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, 512(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 520(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 30, 528(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 31, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 27, 816(1)
+; CHECK-NEXT:    ld 26, 808(1)
+; CHECK-NEXT:    std 8, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 9, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 40, 0(23)
+; CHECK-NEXT:    lxv 38, 0(29)
+; CHECK-NEXT:    std 7, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 32, 0(30)
+; CHECK-NEXT:    lxv 36, 0(15)
+; CHECK-NEXT:    mr 8, 29
+; CHECK-NEXT:    mr 10, 30
+; CHECK-NEXT:    std 26, 160(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 13, 0(16)
+; CHECK-NEXT:    lxv 12, 0(17)
 ; CHECK-NEXT:    stfd 26, 544(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 27, 552(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 11, 0(2)
+; CHECK-NEXT:    lxv 9, 0(20)
 ; CHECK-NEXT:    stfd 28, 560(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 29, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 5, 0(24)
+; CHECK-NEXT:    lxv 4, 0(25)
 ; CHECK-NEXT:    stfd 30, 576(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 31, 584(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 2, 0(26)
+; CHECK-NEXT:    lxv 0, 0(27)
+; CHECK-NEXT:    li 27, 0
 ; CHECK-NEXT:    stxv 52, 208(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv 53, 224(1) # 16-byte Folded Spill
+; CHECK-NEXT:    lxv 1, 0(28)
 ; CHECK-NEXT:    stxv 54, 240(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv 55, 256(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv 56, 272(1) # 16-byte Folded Spill
@@ -86,58 +152,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
 ; CHECK-NEXT:    stxv 61, 352(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv 62, 368(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    stxv 63, 384(1) # 16-byte Folded Spill
-; CHECK-NEXT:    blt 0, .LBB0_7
-; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
-; CHECK-NEXT:    lwz 3, 0(3)
-; CHECK-NEXT:    cmpwi 3, 1
-; CHECK-NEXT:    blt 0, .LBB0_7
-; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
-; CHECK-NEXT:    mr 22, 5
-; CHECK-NEXT:    ld 5, 848(1)
-; CHECK-NEXT:    ld 28, 824(1)
-; CHECK-NEXT:    mr 11, 7
-; CHECK-NEXT:    mr 18, 6
-; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    std 8, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 9, 56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld 6, 712(1)
-; CHECK-NEXT:    ld 23, 688(1)
-; CHECK-NEXT:    std 10, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 6, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 5, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld 5, 840(1)
-; CHECK-NEXT:    cmpldi 3, 9
-; CHECK-NEXT:    ld 19, 768(1)
-; CHECK-NEXT:    ld 2, 760(1)
-; CHECK-NEXT:    lxv 33, 0(6)
-; CHECK-NEXT:    ld 21, 784(1)
-; CHECK-NEXT:    ld 20, 776(1)
-; CHECK-NEXT:    std 2, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 120(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 10, 0(19)
-; CHECK-NEXT:    ld 7, 728(1)
-; CHECK-NEXT:    std 20, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 7, 0(21)
-; CHECK-NEXT:    ld 27, 816(1)
-; CHECK-NEXT:    std 27, 168(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 5, 192(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld 5, 832(1)
-; CHECK-NEXT:    ld 26, 808(1)
-; CHECK-NEXT:    ld 25, 800(1)
-; CHECK-NEXT:    ld 24, 792(1)
-; CHECK-NEXT:    ld 17, 752(1)
-; CHECK-NEXT:    ld 16, 744(1)
-; CHECK-NEXT:    ld 15, 736(1)
-; CHECK-NEXT:    ld 29, 704(1)
-; CHECK-NEXT:    ld 30, 720(1)
-; CHECK-NEXT:    lxv 42, 0(9)
-; CHECK-NEXT:    std 7, 80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 88(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 37, 0(7)
-; CHECK-NEXT:    lxv 43, 0(8)
-; CHECK-NEXT:    mr 8, 29
-; CHECK-NEXT:    std 26, 160(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 16, 96(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 17, 104(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 24, 144(1) # 8-byte Folded Spill
@@ -145,30 +161,14 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
 ; CHECK-NEXT:    std 5, 184(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 28, 176(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld 5, 696(1)
-; CHECK-NEXT:    lxv 41, 0(10)
-; CHECK-NEXT:    lxv 40, 0(23)
-; CHECK-NEXT:    lxv 38, 0(29)
-; CHECK-NEXT:    lxv 32, 0(30)
-; CHECK-NEXT:    lxv 36, 0(15)
-; CHECK-NEXT:    lxv 13, 0(16)
-; CHECK-NEXT:    lxv 12, 0(17)
-; CHECK-NEXT:    lxv 11, 0(2)
-; CHECK-NEXT:    lxv 9, 0(20)
-; CHECK-NEXT:    lxv 5, 0(24)
-; CHECK-NEXT:    lxv 4, 0(25)
-; CHECK-NEXT:    mr 10, 30
+; CHECK-NEXT:    li 28, 1
+; CHECK-NEXT:    ld 7, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lxv 3, 0(7)
 ; CHECK-NEXT:    std 5, 32(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, 40(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lwa 5, 0(11)
 ; CHECK-NEXT:    li 11, 9
-; CHECK-NEXT:    ld 7, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lxv 2, 0(26)
-; CHECK-NEXT:    lxv 0, 0(27)
-; CHECK-NEXT:    lxv 1, 0(28)
-; CHECK-NEXT:    li 28, 1
-; CHECK-NEXT:    li 27, 0
 ; CHECK-NEXT:    ld 9, 32(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lxv 3, 0(7)
 ; CHECK-NEXT:    iselgt 3, 3, 11
 ; CHECK-NEXT:    addi 3, 3, -2
 ; CHECK-NEXT:    mulli 6, 5, 40
@@ -270,16 +270,52 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
 ; CHECK-NEXT:    ble 0, .LBB0_3
 ; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
 ; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lxv 63, 384(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    stxv 43, 0(3)
 ; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lxv 62, 368(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 61, 352(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 60, 336(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 59, 320(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 58, 304(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 57, 288(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 56, 272(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 55, 256(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 54, 240(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 53, 224(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 52, 208(1) # 16-byte Folded Reload
 ; CHECK-NEXT:    stxv 42, 0(3)
 ; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 31, 584(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 30, 576(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 29, 568(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 28, 560(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 27, 552(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 26, 544(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 31, 536(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 30, 528(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, 520(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 27, 504(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 41, 0(3)
 ; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 26, 496(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 25, 488(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 24, 480(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 23, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 18, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 17, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 16, 416(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 40, 0(3)
 ; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 39, 0(9)
 ; CHECK-NEXT:    stxv 38, 0(8)
+; CHECK-NEXT:    ld 15, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 14, 400(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 33, 0(3)
 ; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 32, 0(10)
@@ -314,43 +350,6 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
 ; CHECK-NEXT:    stxv 8, 0(3)
 ; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 6, 0(3)
-; CHECK-NEXT:  .LBB0_7: # %_return_bb
-; CHECK-NEXT:    lxv 63, 384(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 62, 368(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 61, 352(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 60, 336(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 59, 320(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 58, 304(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 57, 288(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 56, 272(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 55, 256(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 54, 240(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 53, 224(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 52, 208(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lfd 31, 584(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 30, 576(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 29, 568(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 28, 560(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 27, 552(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 26, 544(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 31, 536(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 30, 528(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, 520(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, 512(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 27, 504(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 26, 496(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 25, 488(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 24, 480(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 23, 472(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 22, 464(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, 456(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, 448(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, 440(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 18, 432(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 17, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 16, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 408(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 14, 400(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi 1, 1, 592
 ; CHECK-NEXT:    blr
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
index d67f6a029a0d7..1a3aaaec037ab 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
@@ -1,10 +1,9 @@
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
-; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY
+; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SR
 ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
-; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-ONLY
+; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-sco=false --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SR
 ; RUN: not --crash llc -relocation-model=pic -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff -tailcallopt -disable-ppc-sco=false --enable-shrink-wrap=true 2>&1 | FileCheck %s -check-prefix=CHECK-AIX
 ;; The above RUN command is expected to fail on AIX since tail calling is not implemented ATM
-;; Edit: D63152 prevents stack popping before loads and stores, so shrink-wrap does nothing here
 %"class.clang::NamedDecl" = type { i32 }
 declare void @__assert_fail();
 
@@ -31,6 +30,12 @@ exit:
 ; CHECK-SCO-ONLY: #TC_RETURNd8
 ; CHECK-SCO-ONLY: bl __assert_fail
 ;
+; CHECK-SCO-SR-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
+; CHECK-SCO-SR: b LVComputationKind
+; CHECK-SCO-SR: #TC_RETURNd8
+; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-SR: bl __assert_fail
+
 ; CHECK-AIX: LLVM ERROR: Tail call support is unimplemented on AIX.
 }
 

diff  --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 90dad26a1833f..0968f8afd7fd8 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -139,34 +139,30 @@ if.end:                                           ; preds = %if.then, %entry
 define dso_local i32 @load_half() nounwind {
 ; RV32-LABEL: load_half:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    lui a0, %hi(foo+8)
 ; RV32-NEXT:    lhu a0, %lo(foo+8)(a0)
 ; RV32-NEXT:    li a1, 140
 ; RV32-NEXT:    bne a0, a1, .LBB8_2
 ; RV32-NEXT:  # %bb.1: # %if.end
 ; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB8_2: # %if.then
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    call abort at plt
 ;
 ; RV64-LABEL: load_half:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    lui a0, %hi(foo+8)
 ; RV64-NEXT:    lhu a0, %lo(foo+8)(a0)
 ; RV64-NEXT:    li a1, 140
 ; RV64-NEXT:    bne a0, a1, .LBB8_2
 ; RV64-NEXT:  # %bb.1: # %if.end
 ; RV64-NEXT:    li a0, 0
-; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ; RV64-NEXT:  .LBB8_2: # %if.then
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    call abort at plt
 entry:
   %0 = load i16, ptr getelementptr inbounds ([6 x i16], ptr @foo, i32 0, i32 4), align 2

diff  --git a/llvm/test/CodeGen/RISCV/pr51206.ll b/llvm/test/CodeGen/RISCV/pr51206.ll
index 7ae250bbc050c..b83903e7c55cd 100644
--- a/llvm/test/CodeGen/RISCV/pr51206.ll
+++ b/llvm/test/CodeGen/RISCV/pr51206.ll
@@ -11,8 +11,6 @@
 define signext i32 @wobble() nounwind {
 ; CHECK-LABEL: wobble:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    lui a0, %hi(global)
 ; CHECK-NEXT:    lbu a0, %lo(global)(a0)
 ; CHECK-NEXT:    lui a1, %hi(global.2)
@@ -31,11 +29,13 @@ define signext i32 @wobble() nounwind {
 ; CHECK-NEXT:    sw a1, %lo(global.3)(a2)
 ; CHECK-NEXT:    bltu a0, a3, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb10
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    call quux at plt
-; CHECK-NEXT:  .LBB0_2: # %bb12
-; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:  .LBB0_2: # %bb12
+; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    ret
 bb:
   %tmp = load i8, ptr @global, align 1

diff  --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
index 07e19dd58fe5a..c52a54569b3dd 100644
--- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
@@ -1445,47 +1445,53 @@ if.end:
 define i1 @beq_to_bx(ptr %y, i32 %head) {
 ; ENABLE-V4T-LABEL: beq_to_bx:
 ; ENABLE-V4T:       @ %bb.0: @ %entry
-; ENABLE-V4T-NEXT:    push {r4, lr}
-; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
-; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
-; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
 ; ENABLE-V4T-NEXT:    movs r2, r0
 ; ENABLE-V4T-NEXT:    movs r0, #1
 ; ENABLE-V4T-NEXT:    cmp r2, #0
 ; ENABLE-V4T-NEXT:    beq LBB11_3
 ; ENABLE-V4T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V4T-NEXT:    push {r4, lr}
+; ENABLE-V4T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V4T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V4T-NEXT:    .cfi_offset r4, -8
 ; ENABLE-V4T-NEXT:    ldr r3, [r2]
 ; ENABLE-V4T-NEXT:    lsls r4, r3, #30
+; ENABLE-V4T-NEXT:    ldr r4, [sp, #4]
+; ENABLE-V4T-NEXT:    mov lr, r4
+; ENABLE-V4T-NEXT:    pop {r4}
+; ENABLE-V4T-NEXT:    add sp, #4
 ; ENABLE-V4T-NEXT:    bpl LBB11_3
 ; ENABLE-V4T-NEXT:  @ %bb.2: @ %if.end4
 ; ENABLE-V4T-NEXT:    str r1, [r2]
 ; ENABLE-V4T-NEXT:    str r3, [r2]
 ; ENABLE-V4T-NEXT:    movs r0, #0
 ; ENABLE-V4T-NEXT:  LBB11_3: @ %cleanup
-; ENABLE-V4T-NEXT:    pop {r4}
-; ENABLE-V4T-NEXT:    pop {r1}
-; ENABLE-V4T-NEXT:    bx r1
+; ENABLE-V4T-NEXT:    bx lr
 ;
 ; ENABLE-V5T-LABEL: beq_to_bx:
 ; ENABLE-V5T:       @ %bb.0: @ %entry
-; ENABLE-V5T-NEXT:    push {r4, lr}
-; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
-; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
-; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
 ; ENABLE-V5T-NEXT:    movs r2, r0
 ; ENABLE-V5T-NEXT:    movs r0, #1
 ; ENABLE-V5T-NEXT:    cmp r2, #0
 ; ENABLE-V5T-NEXT:    beq LBB11_3
 ; ENABLE-V5T-NEXT:  @ %bb.1: @ %if.end
+; ENABLE-V5T-NEXT:    push {r4, lr}
+; ENABLE-V5T-NEXT:    .cfi_def_cfa_offset 8
+; ENABLE-V5T-NEXT:    .cfi_offset lr, -4
+; ENABLE-V5T-NEXT:    .cfi_offset r4, -8
 ; ENABLE-V5T-NEXT:    ldr r3, [r2]
 ; ENABLE-V5T-NEXT:    lsls r4, r3, #30
+; ENABLE-V5T-NEXT:    ldr r4, [sp, #4]
+; ENABLE-V5T-NEXT:    mov lr, r4
+; ENABLE-V5T-NEXT:    pop {r4}
+; ENABLE-V5T-NEXT:    add sp, #4
 ; ENABLE-V5T-NEXT:    bpl LBB11_3
 ; ENABLE-V5T-NEXT:  @ %bb.2: @ %if.end4
 ; ENABLE-V5T-NEXT:    str r1, [r2]
 ; ENABLE-V5T-NEXT:    str r3, [r2]
 ; ENABLE-V5T-NEXT:    movs r0, #0
 ; ENABLE-V5T-NEXT:  LBB11_3: @ %cleanup
-; ENABLE-V5T-NEXT:    pop {r4, pc}
+; ENABLE-V5T-NEXT:    bx lr
 ;
 ; DISABLE-V4T-LABEL: beq_to_bx:
 ; DISABLE-V4T:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 97e2db2a68cfa..0e57e9d135a4d 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -9,7 +9,6 @@
 define dso_local i32 @main() nounwind {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq g_16(%rip), %rax
 ; CHECK-NEXT:    sbbl %eax, %eax
@@ -22,6 +21,7 @@ define dso_local i32 @main() nounwind {
 ; CHECK-NEXT:  .LBB0_1: # %entry.if.end_crit_edge
 ; CHECK-NEXT:    movl g_38(%rip), %esi
 ; CHECK-NEXT:  .LBB0_3: # %if.end
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl $.L.str, %edi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    callq printf at PLT

diff  --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll
index dbe85eced6a59..374e75967d52f 100644
--- a/llvm/test/CodeGen/X86/cmov.ll
+++ b/llvm/test/CodeGen/X86/cmov.ll
@@ -78,7 +78,6 @@ define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
 define i1 @test4() nounwind {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    movsbl g_3(%rip), %edx
 ; CHECK-NEXT:    movzbl %dl, %ecx
 ; CHECK-NEXT:    shrl $7, %ecx
@@ -91,6 +90,7 @@ define i1 @test4() nounwind {
 ; CHECK-NEXT:  # %bb.1: # %bb.i.i.i
 ; CHECK-NEXT:    movzbl g_100(%rip), %ecx
 ; CHECK-NEXT:  .LBB3_2: # %func_4.exit.i
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    setne %bl

diff  --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 15833fa3f0c08..6af80860401af 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -43,7 +43,6 @@ define dso_local i32 @test1() nounwind {
 ;
 ; X64-LABEL: test1:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movzbl b(%rip), %ecx
 ; X64-NEXT:    leal 1(%rcx), %eax
 ; X64-NEXT:    movb %al, b(%rip)
@@ -57,11 +56,12 @@ define dso_local i32 @test1() nounwind {
 ; X64-NEXT:    testb %dl, %dl
 ; X64-NEXT:    jne .LBB0_2
 ; X64-NEXT:  # %bb.1: # %if.then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movsbl %al, %edi
 ; X64-NEXT:    callq external
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB0_2: # %if.end
 ; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
 entry:
   %bval = load i8, ptr @b

diff  --git a/llvm/test/CodeGen/X86/csr-split.ll b/llvm/test/CodeGen/X86/csr-split.ll
index 81057d80ffdcb..460e705d1847a 100644
--- a/llvm/test/CodeGen/X86/csr-split.ll
+++ b/llvm/test/CodeGen/X86/csr-split.ll
@@ -9,18 +9,15 @@
 define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    movslq a(%rip), %rax
 ; CHECK-NEXT:    cmpq %rdi, %rax
 ; CHECK-NEXT:    je .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.end
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_2: # %if.then
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    callq callVoid at PLT
 ; CHECK-NEXT:    movq %rbx, %rdi

diff  --git a/llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll b/llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll
index ee2332c725d1b..5cb1b8f4a89e4 100644
--- a/llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll
@@ -11,10 +11,6 @@ target triple = "i686-pc-windows-msvc18.0.0"
 define x86_thiscallcc void @stackRealignment(ptr %this) {
 ; SHRINK-WRAP-LABEL: stackRealignment:
 ; SHRINK-WRAP:       # %bb.0: # %entry
-; SHRINK-WRAP-NEXT:    pushl %ebp
-; SHRINK-WRAP-NEXT:    movl %esp, %ebp
-; SHRINK-WRAP-NEXT:    andl $-8, %esp
-; SHRINK-WRAP-NEXT:    subl $16, %esp
 ; SHRINK-WRAP-NEXT:    movl (%ecx), %eax
 ; SHRINK-WRAP-NEXT:    cmpl $33, %eax
 ; SHRINK-WRAP-NEXT:    movl $42, %edx
@@ -22,6 +18,10 @@ define x86_thiscallcc void @stackRealignment(ptr %this) {
 ; SHRINK-WRAP-NEXT:  # %bb.1: # %entry
 ; SHRINK-WRAP-NEXT:    movl $128, %edx
 ; SHRINK-WRAP-NEXT:  LBB0_2: # %entry
+; SHRINK-WRAP-NEXT:    pushl %ebp
+; SHRINK-WRAP-NEXT:    movl %esp, %ebp
+; SHRINK-WRAP-NEXT:    andl $-8, %esp
+; SHRINK-WRAP-NEXT:    subl $16, %esp
 ; SHRINK-WRAP-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; SHRINK-WRAP-NEXT:    cmpl $32, %eax
 ; SHRINK-WRAP-NEXT:    jl LBB0_4

diff  --git a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll
index e2c407ce264c2..0afdb740233d9 100644
--- a/llvm/test/CodeGen/X86/inline-asm-flag-output.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-flag-output.ll
@@ -888,15 +888,15 @@ define void @test_cca_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_cca_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jbe .LBB28_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB28_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@cca},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -928,15 +928,15 @@ define void @test_ccae_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccae_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jb .LBB29_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB29_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -968,15 +968,15 @@ define void @test_ccb_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccb_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jae .LBB30_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB30_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1008,15 +1008,15 @@ define void @test_ccbe_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccbe_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    ja .LBB31_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB31_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1048,15 +1048,15 @@ define void @test_ccc_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccc_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jae .LBB32_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB32_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1088,15 +1088,15 @@ define void @test_cce_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_cce_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jne .LBB33_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB33_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@cce},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1128,15 +1128,15 @@ define void @test_ccz_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccz_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jne .LBB34_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB34_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1168,15 +1168,15 @@ define void @test_ccg_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccg_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jle .LBB35_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB35_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccg},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1208,15 +1208,15 @@ define void @test_ccge_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccge_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jl .LBB36_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB36_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1248,15 +1248,15 @@ define void @test_ccl_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccl_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jge .LBB37_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB37_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1288,15 +1288,15 @@ define void @test_ccle_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccle_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jg .LBB38_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB38_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1328,15 +1328,15 @@ define void @test_ccna_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccna_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    ja .LBB39_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB39_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccna},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1368,15 +1368,15 @@ define void @test_ccnae_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnae_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jae .LBB40_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB40_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnae},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1408,15 +1408,15 @@ define void @test_ccnb_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnb_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jb .LBB41_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB41_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnb},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1448,15 +1448,15 @@ define void @test_ccnbe_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnbe_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jbe .LBB42_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB42_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnbe},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1488,15 +1488,15 @@ define void @test_ccnc_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnc_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jb .LBB43_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB43_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnc},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1528,15 +1528,15 @@ define void @test_ccne_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccne_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    je .LBB44_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB44_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccne},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1568,15 +1568,15 @@ define void @test_ccnz_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnz_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    je .LBB45_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB45_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnz},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1608,15 +1608,15 @@ define void @test_ccng_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccng_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jg .LBB46_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB46_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccng},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1648,15 +1648,15 @@ define void @test_ccnge_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnge_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jge .LBB47_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB47_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnge},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1688,15 +1688,15 @@ define void @test_ccnl_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnl_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jl .LBB48_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB48_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnl},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1728,15 +1728,15 @@ define void @test_ccnle_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnle_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jle .LBB49_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB49_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnle},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1768,15 +1768,15 @@ define void @test_ccno_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccno_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jo .LBB50_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB50_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccno},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1808,15 +1808,15 @@ define void @test_ccnp_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccnp_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jp .LBB51_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB51_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccnp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1848,15 +1848,15 @@ define void @test_ccns_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccns_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    js .LBB52_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB52_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccns},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1888,15 +1888,15 @@ define void @test_cco_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_cco_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jno .LBB53_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB53_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@cco},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1928,15 +1928,15 @@ define void @test_ccp_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccp_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jnp .LBB54_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB54_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccp},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind
@@ -1968,15 +1968,15 @@ define void @test_ccs_branch(i64 %nr, ptr %addr) nounwind {
 ;
 ; X64-LABEL: test_ccs_branch:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    cmp %rdi,(%rsi)
 ; X64-NEXT:    #NO_APP
 ; X64-NEXT:    jns .LBB55_2
 ; X64-NEXT:  # %bb.1: # %then
+; X64-NEXT:    pushq %rax
 ; X64-NEXT:    callq bar at PLT
+; X64-NEXT:    addq $8, %rsp
 ; X64-NEXT:  .LBB55_2: # %exit
-; X64-NEXT:    popq %rax
 ; X64-NEXT:    retq
 entry:
   %cc = tail call i8 asm "cmp $2,$1", "={@ccs},=*m,r,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) %addr, i64 %nr) nounwind

diff  --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll
index 8284824741d11..52bcbe9f83d7a 100644
--- a/llvm/test/CodeGen/X86/peep-test-5.ll
+++ b/llvm/test/CodeGen/X86/peep-test-5.ll
@@ -12,25 +12,22 @@
 define void @decref(ptr %p) {
 ; CHECK-LABEL: decref:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movl (%rdi), %eax
 ; CHECK-NEXT:    cmpl $1, %eax
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb_free
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq free_object at PLT
-; CHECK-NEXT:  .LBB0_4: # %end
-; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB0_4: # %end
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_2: # %bb2
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    jle .LBB0_4
 ; CHECK-NEXT:  # %bb.3: # %bb_dec
 ; CHECK-NEXT:    decl %eax
 ; CHECK-NEXT:    movl %eax, (%rdi)
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
   %count = load i32, ptr %p, align 4
   %cmp0 = icmp eq i32 %count, 1

diff  --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
index ba3233943418d..0793c33d2e8eb 100644
--- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
+++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk-x86_64.ll
@@ -9,10 +9,6 @@
 define void @fn1() nounwind uwtable {
 ; CHECK-LABEL: fn1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $4136, %eax # imm = 0x1028
-; CHECK-NEXT:    callq ___chkstk_ms
-; CHECK-NEXT:    subq %rax, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 4144
 ; CHECK-NEXT:    movl a(%rip), %eax
 ; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    jne .LBB0_2
@@ -24,6 +20,12 @@ define void @fn1() nounwind uwtable {
 ; CHECK-NEXT:    shrq $32, %rax
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:  .LBB0_2: # %select.end
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movl $4128, %eax # imm = 0x1020
+; CHECK-NEXT:    callq ___chkstk_ms
+; CHECK-NEXT:    subq %rax, %rsp
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 4144
 ; CHECK-NEXT:    movl %eax, b(%rip)
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    # kill: def $ecx killed $ecx killed $rcx

diff  --git a/llvm/test/CodeGen/X86/statepoint-vector.ll b/llvm/test/CodeGen/X86/statepoint-vector.ll
index 5ce81592b8b0b..c8ddb89ee5698 100644
--- a/llvm/test/CodeGen/X86/statepoint-vector.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vector.ll
@@ -57,10 +57,10 @@ entry:
 define <2 x ptr addrspace(1)> @test3(i1 %cnd, ptr %ptr) gc "statepoint-example" {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    testb $1, %dil
 ; CHECK-NEXT:    movaps (%rsi), %xmm0
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    movaps %xmm0, (%rsp)
 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    callq do_safepoint at PLT

diff  --git a/llvm/test/DebugInfo/X86/merge_inlined_loc.ll b/llvm/test/DebugInfo/X86/merge_inlined_loc.ll
index 07a72d6d30d70..41dcbe4785304 100644
--- a/llvm/test/DebugInfo/X86/merge_inlined_loc.ll
+++ b/llvm/test/DebugInfo/X86/merge_inlined_loc.ll
@@ -26,7 +26,9 @@
 
 ; CHECK: .loc 1 2 25 epilogue_begin
 ; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .Ltmp{{.*}}:
 ; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .loc 1 2 25 is_stmt 0
 ; CHECK-NEXT: jmp _Z6commonv
 ; CHECK-NEXT: [[LABEL:.*]]:
 


        


More information about the llvm-commits mailing list