[llvm] eb7d16e - [PowerPC] make expensive mflr be away from its user in the function prologue

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 14 18:14:37 PST 2022


Author: Chen Zheng
Date: 2022-11-14T21:14:20-05:00
New Revision: eb7d16ea25649909373e324e6ebf36774cabdbfa

URL: https://github.com/llvm/llvm-project/commit/eb7d16ea25649909373e324e6ebf36774cabdbfa
DIFF: https://github.com/llvm/llvm-project/commit/eb7d16ea25649909373e324e6ebf36774cabdbfa.diff

LOG: [PowerPC] make expensive mflr be away from its user in the function prologue

mflr is kind of expensive on Power version smaller than 10, so we should
schedule the store for the mflr's def away from mflr.

In epilogue, the expensive mtlr has no user for its def, so it doesn't
matter that the load and the mtlr are back-to-back.

Reviewed By: RolandF

Differential Revision: https://reviews.llvm.org/D137423

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
    llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
    llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
    llvm/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
    llvm/test/CodeGen/PowerPC/CSR-fit.ll
    llvm/test/CodeGen/PowerPC/Frames-dyn-alloca-with-func-call.ll
    llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
    llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
    llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
    llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
    llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
    llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll
    llvm/test/CodeGen/PowerPC/aix-crspill.ll
    llvm/test/CodeGen/PowerPC/aix-csr.ll
    llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll
    llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
    llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
    llvm/test/CodeGen/PowerPC/aix-lr.ll
    llvm/test/CodeGen/PowerPC/aix-sret-param.ll
    llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
    llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
    llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
    llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
    llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
    llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
    llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
    llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
    llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll
    llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll
    llvm/test/CodeGen/PowerPC/all-atomics.ll
    llvm/test/CodeGen/PowerPC/alloca-crspill.ll
    llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
    llvm/test/CodeGen/PowerPC/atomics-i128.ll
    llvm/test/CodeGen/PowerPC/atomics-indexed.ll
    llvm/test/CodeGen/PowerPC/atomics.ll
    llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
    llvm/test/CodeGen/PowerPC/byval.ll
    llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
    llvm/test/CodeGen/PowerPC/constant-pool.ll
    llvm/test/CodeGen/PowerPC/csr-split.ll
    llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
    llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
    llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
    llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
    llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
    llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll
    llvm/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/test/CodeGen/PowerPC/f128-arith.ll
    llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
    llvm/test/CodeGen/PowerPC/f128-compare.ll
    llvm/test/CodeGen/PowerPC/f128-conv.ll
    llvm/test/CodeGen/PowerPC/f128-fma.ll
    llvm/test/CodeGen/PowerPC/f128-passByValue.ll
    llvm/test/CodeGen/PowerPC/f128-rounding.ll
    llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
    llvm/test/CodeGen/PowerPC/fast-isel-branch.ll
    llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
    llvm/test/CodeGen/PowerPC/fmf-propagation.ll
    llvm/test/CodeGen/PowerPC/fminnum.ll
    llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
    llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
    llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll
    llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
    llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
    llvm/test/CodeGen/PowerPC/fp-strict-round.ll
    llvm/test/CodeGen/PowerPC/fp-strict.ll
    llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
    llvm/test/CodeGen/PowerPC/frem.ll
    llvm/test/CodeGen/PowerPC/funnel-shift.ll
    llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
    llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
    llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
    llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll
    llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
    llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
    llvm/test/CodeGen/PowerPC/machine-pre.ll
    llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
    llvm/test/CodeGen/PowerPC/no-duplicate.ll
    llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll
    llvm/test/CodeGen/PowerPC/out-of-range-dform.ll
    llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
    llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
    llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
    llvm/test/CodeGen/PowerPC/ppc-prologue.ll
    llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
    llvm/test/CodeGen/PowerPC/ppc32-nest.ll
    llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
    llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
    llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
    llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll
    llvm/test/CodeGen/PowerPC/ppc64-nest.ll
    llvm/test/CodeGen/PowerPC/ppc64-notoc-rm-relocation.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
    llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
    llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
    llvm/test/CodeGen/PowerPC/pr33547.ll
    llvm/test/CodeGen/PowerPC/pr36292.ll
    llvm/test/CodeGen/PowerPC/pr41088.ll
    llvm/test/CodeGen/PowerPC/pr43527.ll
    llvm/test/CodeGen/PowerPC/pr43976.ll
    llvm/test/CodeGen/PowerPC/pr44183.ll
    llvm/test/CodeGen/PowerPC/pr45301.ll
    llvm/test/CodeGen/PowerPC/pr45432.ll
    llvm/test/CodeGen/PowerPC/pr47373.ll
    llvm/test/CodeGen/PowerPC/pr48519.ll
    llvm/test/CodeGen/PowerPC/pr48527.ll
    llvm/test/CodeGen/PowerPC/pr49092.ll
    llvm/test/CodeGen/PowerPC/pr55463.ll
    llvm/test/CodeGen/PowerPC/pr56469.ll
    llvm/test/CodeGen/PowerPC/read-set-flm.ll
    llvm/test/CodeGen/PowerPC/recipest.ll
    llvm/test/CodeGen/PowerPC/reg-scavenging.ll
    llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
    llvm/test/CodeGen/PowerPC/retaddr.ll
    llvm/test/CodeGen/PowerPC/retaddr2.ll
    llvm/test/CodeGen/PowerPC/retaddr_multi_levels.ll
    llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
    llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
    llvm/test/CodeGen/PowerPC/sms-phi-1.ll
    llvm/test/CodeGen/PowerPC/sms-phi-3.ll
    llvm/test/CodeGen/PowerPC/spe.ll
    llvm/test/CodeGen/PowerPC/srem-lkk.ll
    llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
    llvm/test/CodeGen/PowerPC/store_fptoi.ll
    llvm/test/CodeGen/PowerPC/tailcall-speculatable-callee.ll
    llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
    llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
    llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
    llvm/test/CodeGen/PowerPC/urem-lkk.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
    llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
    llvm/test/DebugInfo/XCOFF/explicit-section.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 4138ea36856b3..1f17e9282404b 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -616,6 +616,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   // AIX assembler does not support cfi directives.
   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
 
+  const bool HasFastMFLR = Subtarget.hasFastMFLR();
+
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
   // Get the ABI.
@@ -837,10 +839,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   // Generate the instruction to store the LR. In the case where ROP protection
   // is required the register holding the LR should not be killed as it will be
   // used by the hash store instruction.
-  if (MustSaveLR) {
+  auto SaveLR = [&](int64_t Offset) {
+    assert(MustSaveLR && "LR is not required to be saved!");
     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
-        .addImm(LROffset)
+        .addImm(Offset)
         .addReg(SPReg);
 
     // Add the ROP protection Hash Store instruction.
@@ -861,7 +864,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
           .addImm(ImmOffset)
           .addReg(SPReg);
     }
-  }
+  };
+
+  if (MustSaveLR && HasFastMFLR)
+      SaveLR(LROffset);
 
   if (MustSaveCR &&
       !(SingleScratchReg && MustSaveLR)) {
@@ -873,8 +879,11 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
-  if (!FrameSize)
+  if (!FrameSize) {
+    if (MustSaveLR && !HasFastMFLR)
+      SaveLR(LROffset);
     return;
+  }
 
   // Adjust stack pointer: r1 += NegFrameSize.
   // If there is a preferred stack alignment, align R1 now
@@ -888,7 +897,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
 
   // Have we generated a STUX instruction to claim stack frame? If so,
   // the negated frame size will be placed in ScratchReg.
-  bool HasSTUX = false;
+  bool HasSTUX =
+      (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
+      (HasBP && MaxAlign > 1) || isLargeFrame;
+
+  // If we use STUX to update the stack pointer, we need the two scratch
+  // registers TempReg and ScratchReg, we have to save LR here which is stored
+  // in ScratchReg.
+  if (HasSTUX && MustSaveLR && !HasFastMFLR)
+    SaveLR(LROffset);
 
   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
   // pointer is always stored at SP, we will get a free probe due to an essential
@@ -909,7 +926,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
           .addReg(ScratchReg)
           .addReg(SPReg);
-      HasSTUX = true;
     }
   } else {
     // This condition must be kept in sync with canUseAsPrologue.
@@ -941,21 +957,17 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
           .addReg(SPReg, RegState::Kill)
           .addReg(SPReg)
           .addReg(ScratchReg);
-      HasSTUX = true;
-
     } else if (!isLargeFrame) {
       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
           .addReg(SPReg)
           .addImm(NegFrameSize)
           .addReg(SPReg);
-
     } else {
       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
           .addReg(SPReg, RegState::Kill)
           .addReg(SPReg)
           .addReg(ScratchReg);
-      HasSTUX = true;
     }
   }
 
@@ -1082,6 +1094,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
+  // Save the LR now.
+  if (!HasSTUX && MustSaveLR && !HasFastMFLR)
+    SaveLR(LROffset + FrameSize);
+
   // Add Call Frame Information for the instructions we generated above.
   if (needsCFI) {
     unsigned CFIIndex;

diff  --git a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 9893d07834ab6..6e314a02590a6 100644
--- a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -54,8 +54,8 @@ define i32 @main() {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    bl foo

diff  --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index f9ebd53ca7736..db4e22fd8d17a 100644
--- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -10,8 +10,8 @@ define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality ptr @__
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 31, -8(1)
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -176(1)
+; CHECK-NEXT:    std 0, 192(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    .cfi_offset r31, -8
 ; CHECK-NEXT:    .cfi_offset lr, 16

diff  --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 1d9ab7e550713..0405b25e7fb03 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -5,11 +5,11 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-LABEL: __fixunstfdi:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -464(1)
 ; CHECK-NEXT:    mfcr 12
-; CHECK-NEXT:    stw 29, 412(1) # 4-byte Folded Spill
+; CHECK-NEXT:    stw 0, 468(1)
 ; CHECK-NEXT:    lis 3, .LCPI0_0 at ha
+; CHECK-NEXT:    stw 29, 412(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    stw 12, 408(1)
 ; CHECK-NEXT:    stfd 2, 376(1)

diff  --git a/llvm/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/llvm/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
index 98f960b0a7ecb..e4deef157be81 100644
--- a/llvm/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
+++ b/llvm/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -20,8 +20,8 @@ define ptr @g() nounwind readnone {
 ; CHECK-LABEL: g:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    lwz 3, 0(1)
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    lwz 3, 4(3)

diff  --git a/llvm/test/CodeGen/PowerPC/CSR-fit.ll b/llvm/test/CodeGen/PowerPC/CSR-fit.ll
index 0f17776ec05cb..e88f38178e65e 100644
--- a/llvm/test/CodeGen/PowerPC/CSR-fit.ll
+++ b/llvm/test/CodeGen/PowerPC/CSR-fit.ll
@@ -17,11 +17,11 @@ define dso_local signext i32 @caller1(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-NEXT:    .cfi_offset r15, -136
 ; CHECK-PWR8-NEXT:    std r14, -144(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r15, -136(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -176(r1)
 ; CHECK-PWR8-NEXT:    #APP
 ; CHECK-PWR8-NEXT:    add r3, r3, r4
 ; CHECK-PWR8-NEXT:    #NO_APP
+; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    bl callee
 ; CHECK-PWR8-NEXT:    nop
@@ -41,12 +41,12 @@ define dso_local signext i32 @caller1(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR9-NEXT:    .cfi_offset r15, -136
 ; CHECK-PWR9-NEXT:    std r14, -144(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r15, -136(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -176(r1)
 ; CHECK-PWR9-NEXT:    #APP
 ; CHECK-PWR9-NEXT:    add r3, r3, r4
 ; CHECK-PWR9-NEXT:    #NO_APP
 ; CHECK-PWR9-NEXT:    extsw r3, r3
+; CHECK-PWR9-NEXT:    std r0, 192(r1)
 ; CHECK-PWR9-NEXT:    bl callee
 ; CHECK-PWR9-NEXT:    nop
 ; CHECK-PWR9-NEXT:    addi r1, r1, 176
@@ -71,11 +71,11 @@ define dso_local signext i32 @caller2(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-NEXT:    .cfi_offset f15, -136
 ; CHECK-PWR8-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -176(r1)
 ; CHECK-PWR8-NEXT:    #APP
 ; CHECK-PWR8-NEXT:    add r3, r3, r4
 ; CHECK-PWR8-NEXT:    #NO_APP
+; CHECK-PWR8-NEXT:    std r0, 192(r1)
 ; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    bl callee
 ; CHECK-PWR8-NEXT:    nop
@@ -95,12 +95,12 @@ define dso_local signext i32 @caller2(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR9-NEXT:    .cfi_offset f15, -136
 ; CHECK-PWR9-NEXT:    stfd f14, -144(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    stfd f15, -136(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -176(r1)
 ; CHECK-PWR9-NEXT:    #APP
 ; CHECK-PWR9-NEXT:    add r3, r3, r4
 ; CHECK-PWR9-NEXT:    #NO_APP
 ; CHECK-PWR9-NEXT:    extsw r3, r3
+; CHECK-PWR9-NEXT:    std r0, 192(r1)
 ; CHECK-PWR9-NEXT:    bl callee
 ; CHECK-PWR9-NEXT:    nop
 ; CHECK-PWR9-NEXT:    addi r1, r1, 176
@@ -119,8 +119,8 @@ define dso_local signext i32 @caller3(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-LABEL: caller3:
 ; CHECK-PWR8:       # %bb.0: # %entry
 ; CHECK-PWR8-NEXT:    mflr r0
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -240(r1)
+; CHECK-PWR8-NEXT:    std r0, 256(r1)
 ; CHECK-PWR8-NEXT:    .cfi_def_cfa_offset 240
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset v20, -192
@@ -147,8 +147,8 @@ define dso_local signext i32 @caller3(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR9-LABEL: caller3:
 ; CHECK-PWR9:       # %bb.0: # %entry
 ; CHECK-PWR9-NEXT:    mflr r0
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -224(r1)
+; CHECK-PWR9-NEXT:    std r0, 240(r1)
 ; CHECK-PWR9-NEXT:    .cfi_def_cfa_offset 224
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset v20, -192
@@ -177,8 +177,8 @@ define dso_local signext i32 @caller4(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR8-LABEL: caller4:
 ; CHECK-PWR8:       # %bb.0: # %entry
 ; CHECK-PWR8-NEXT:    mflr r0
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -240(r1)
+; CHECK-PWR8-NEXT:    std r0, 256(r1)
 ; CHECK-PWR8-NEXT:    .cfi_def_cfa_offset 240
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset v20, -192
@@ -205,8 +205,8 @@ define dso_local signext i32 @caller4(i32 signext %a, i32 signext %b) local_unna
 ; CHECK-PWR9-LABEL: caller4:
 ; CHECK-PWR9:       # %bb.0: # %entry
 ; CHECK-PWR9-NEXT:    mflr r0
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -224(r1)
+; CHECK-PWR9-NEXT:    std r0, 240(r1)
 ; CHECK-PWR9-NEXT:    .cfi_def_cfa_offset 224
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset v20, -192
@@ -235,8 +235,8 @@ define dso_local signext i32 @caller_mixed(i32 signext %a, i32 signext %b) local
 ; CHECK-PWR8-LABEL: caller_mixed:
 ; CHECK-PWR8:       # %bb.0: # %entry
 ; CHECK-PWR8-NEXT:    mflr r0
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -528(r1)
+; CHECK-PWR8-NEXT:    std r0, 544(r1)
 ; CHECK-PWR8-NEXT:    .cfi_def_cfa_offset 528
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r14, -288
@@ -269,8 +269,8 @@ define dso_local signext i32 @caller_mixed(i32 signext %a, i32 signext %b) local
 ; CHECK-PWR9-LABEL: caller_mixed:
 ; CHECK-PWR9:       # %bb.0: # %entry
 ; CHECK-PWR9-NEXT:    mflr r0
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -512(r1)
+; CHECK-PWR9-NEXT:    std r0, 528(r1)
 ; CHECK-PWR9-NEXT:    .cfi_def_cfa_offset 512
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset r14, -288

diff  --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca-with-func-call.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca-with-func-call.ll
index b745de067eed6..b27192190ba3c 100644
--- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca-with-func-call.ll
+++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca-with-func-call.ll
@@ -49,9 +49,9 @@ declare i32 @bar(ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; PPC32-LINUX-LABEL: foo
 ; PPC32-LINUX: mflr 0
-; PPC32-LINUX: stw 0, 4(1)
 ; PPC32-LINUX: stwu 1, -32(1)
 ; PPC32-LINUX: stw 31, 28(1)
+; PPC32-LINUX: stw 0, 36(1)
 ; PPC32-LINUX: mr 31, 1
 ; PPC32-LINUX: addi 3, 31, 32
 ; PPC32-LINUX: stwux 3, 1, 10
@@ -85,13 +85,13 @@ declare i32 @bar(ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr)
 ; PPC32-LINUX: blr
 
 ; PPC64-LABEL: foo
-; PPC64: mflr 0
-; PPC64: std 31, -8(1)
-; PPC64: std 0, 16(1)
-; PPC64: stdu 1, -160(1)
-; PPC64: mr 31, 1
-; PPC64: addi 3, 31, 160
-; PPC64: stdux 3, 1, 10
+; PPC64:     mflr 0
+; PPC64:     std 31, -8(1)
+; PPC64:     stdu 1, -160(1)
+; PPC64-DAG: mr 31, 1
+; PPC64-DAG: std 0, 176(1)
+; PPC64:     addi 3, 31, 160
+; PPC64:     stdux 3, 1, 10
 
 ; Allocated area is referred by stack pointer.
 ; PPC64: addi 11, 1, 128
@@ -122,9 +122,9 @@ declare i32 @bar(ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr)
 
 ; PPC32-AIX: mflr 0
 ; PPC32-AIX: stw 31, -4(1)
-; PPC32-AIX: stw 0, 8(1)
 ; PPC32-AIX: stwu 1, -80(1)
 ; PPC32-AIX: mr 31, 1
+; PPC32-AIX: stw 0, 88(1)
 ; PPC32-AIX: addi 3, 31, 80
 ; PPC32-AIX: stwux 3, 1, 10
 

diff  --git a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
index 88bd0ae5498a5..c8a643679594d 100644
--- a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
+++ b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll
@@ -18,9 +18,9 @@ define noalias ptr @_ZN2CC3funEv(ptr %this) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -48(1)
 ; CHECK-NEXT:    std 2, 24(1)
+; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    ld 12, 0(3)
 ; CHECK-NEXT:    mtctr 12

diff  --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
index a18a5ca4cf4b0..d22f8102f14b8 100644
--- a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
+++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
@@ -157,10 +157,10 @@ define dso_local void @test_Array() nounwind {
 ; CHECK-LE-LABEL: test_Array:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mflr r0
-; CHECK-LE-NEXT:    std r0, 16(r1)
 ; CHECK-LE-NEXT:    stdu r1, -176(r1)
 ; CHECK-LE-NEXT:    addis r4, r2, Arr1 at toc@ha
 ; CHECK-LE-NEXT:    li r3, 0
+; CHECK-LE-NEXT:    std r0, 192(r1)
 ; CHECK-LE-NEXT:    li r6, 65
 ; CHECK-LE-NEXT:    addi r5, r1, 46
 ; CHECK-LE-NEXT:    addi r4, r4, Arr1 at toc@l
@@ -190,11 +190,11 @@ define dso_local void @test_Array() nounwind {
 ; CHECK-BE-LABEL: test_Array:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mflr r0
-; CHECK-BE-NEXT:    std r0, 16(r1)
 ; CHECK-BE-NEXT:    stdu r1, -256(r1)
 ; CHECK-BE-NEXT:    addis r5, r2, Arr1 at toc@ha
 ; CHECK-BE-NEXT:    li r3, 0
 ; CHECK-BE-NEXT:    addi r5, r5, Arr1 at toc@l
+; CHECK-BE-NEXT:    std r0, 272(r1)
 ; CHECK-BE-NEXT:    addi r4, r1, 126
 ; CHECK-BE-NEXT:    li r6, 65
 ; CHECK-BE-NEXT:    stw r3, 124(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 1209e0b544e24..649aaf404b8c3 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -12,9 +12,9 @@ define signext i32 @main() nounwind {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %L.entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -48(1)
 ; CHECK-NEXT:    li 3, -32477
+; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    li 4, 234
 ; CHECK-NEXT:    addi 6, 1, 46
 ; CHECK-NEXT:    sth 3, 46(1)
@@ -65,16 +65,16 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-LABEL: main:
 ; CHECK-P7:       # %bb.0: # %L.entry
 ; CHECK-P7-NEXT:    mflr 0
-; CHECK-P7-NEXT:    std 0, 16(1)
 ; CHECK-P7-NEXT:    stdu 1, -48(1)
 ; CHECK-P7-NEXT:    li 3, -32477
 ; CHECK-P7-NEXT:    lis 5, 0
 ; CHECK-P7-NEXT:    addi 4, 1, 46
 ; CHECK-P7-NEXT:    li 7, 0
+; CHECK-P7-NEXT:    std 0, 64(1)
 ; CHECK-P7-NEXT:    sth 3, 46(1)
 ; CHECK-P7-NEXT:    li 6, 234
-; CHECK-P7-NEXT:    ori 5, 5, 33059
 ; CHECK-P7-NEXT:    rlwinm 3, 4, 3, 27, 27
+; CHECK-P7-NEXT:    ori 5, 5, 33059
 ; CHECK-P7-NEXT:    ori 7, 7, 65535
 ; CHECK-P7-NEXT:    sync
 ; CHECK-P7-NEXT:    slw 6, 6, 3

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index 5fe253475bf95..02fe9943f39c4 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -478,6 +478,7 @@ entry:
 
 ; ASM32PWR4:       stwu 1, -128(1)
 ; ASM32PWR4-NEXT:  lwz [[REG:[0-9]+]], L..C2(2)
+; ASM32PWR4-NEXT:  stw 0, 136(1)
 ; ASM32PWR4-NEXT:  lfd 1, 0([[REG]])
 ; ASM32PWR4-DAG:   stfd 1, 56(1)
 ; ASM32PWR4-DAG:   stfd 1, 64(1)
@@ -529,6 +530,7 @@ entry:
 
 ; ASM64PWR4:       stdu 1, -160(1)
 ; ASM64PWR4-NEXT:  ld [[REG:[0-9]+]], L..C2(2)
+; ASM64PWR4-NEXT:  std 0, 176(1)
 ; ASM64PWR4-NEXT:  lfd 1, 0([[REG]])
 ; ASM64PWR4-DAG:   stfd 1, 112(1)
 ; ASM64PWR4-DAG:   stfd 1, 120(1)
@@ -719,6 +721,7 @@ declare void @test_vararg(i32, ...)
 
 ; ASM32PWR4:      stwu 1, -80(1)
 ; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2)
+; ASM32PWR4-NEXT: stw 0, 88(1)
 ; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2)
 ; ASM32PWR4-NEXT: stfd 1, 64(1)
@@ -747,6 +750,7 @@ declare void @test_vararg(i32, ...)
 
 ; ASM64PWR4:      stdu 1, -128(1)
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2)
+; ASM64PWR4-NEXT: std 0, 144(1)
 ; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2)
 ; ASM64PWR4-NEXT: stfd 1, 112(1)
@@ -787,6 +791,7 @@ entry:
 
 ; ASM32PWR4:      stwu 1, -80(1)
 ; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2)
+; ASM32PWR4-NEXT: stw 0, 88(1)
 ; ASM32PWR4-NEXT: li 6, 42
 ; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C2(2)
@@ -817,6 +822,7 @@ entry:
 
 ; ASM64PWR4:      stdu 1, -128(1)
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2)
+; ASM64PWR4-NEXT: std 0, 144(1)
 ; ASM64PWR4-NEXT: li 5, 42
 ; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2)
@@ -890,6 +896,7 @@ entry:
 
 ; ASM64PWR4:      stdu 1, -128(1)
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2)
+; ASM64PWR4-NEXT: std 0, 144(1)
 ; ASM64PWR4-NEXT: li 5, 42
 ; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C2(2)
@@ -922,6 +929,7 @@ entry:
 
 ; ASM32PWR4:      stwu 1, -64(1)
 ; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], L..C1(2)
+; ASM32PWR4-NEXT: stw 0, 72(1)
 ; ASM32PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM32PWR4-NEXT: li 3, 42
 ; ASM32PWR4-NEXT: stfs 1, 60(1)
@@ -940,6 +948,7 @@ entry:
 
 ; ASM64PWR4:      stdu 1, -128(1)
 ; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], L..C1(2)
+; ASM64PWR4-NEXT: std 0, 144(1)
 ; ASM64PWR4-NEXT: lfs 1, 0([[REG]])
 ; ASM64PWR4-NEXT: li 3, 42
 ; ASM64PWR4-NEXT: stfs 1, 124(1)
@@ -1554,8 +1563,8 @@ entry:
 ; CHECKASM-LABEL:  .caller_ints_stack:
 
 ; ASM32PWR4:        mflr 0
-; ASM32PWR4-DAG:    stw 0, 8(1)
-; ASM32PWR4-DAG:    stwu 1, -96(1)
+; ASM32PWR4-NEXT:   stwu 1, -96(1)
+; ASM32PWR4-DAG:    stw 0, 104(1)
 ; ASM32PWR4-DAG:    li 3, 1
 ; ASM32PWR4-DAG:    li 4, 2
 ; ASM32PWR4-DAG:    li 5, 3
@@ -1600,8 +1609,8 @@ entry:
 ; ASM32PWR4-NEXT:   blr
 
 ; ASM64PWR4:        mflr 0
-; ASM64PWR4-DAG:    std 0, 16(1)
-; ASM64PWR4-DAG:    stdu 1, -176(1)
+; ASM64PWR4-NEXT:   stdu 1, -176(1)
+; ASM64PWR4-DAG:    std 0, 192(1)
 ; ASM64PWR4-DAG:    li 3, 1
 ; ASM64PWR4-DAG:    li 4, 2
 ; ASM64PWR4-DAG:    li 5, 3
@@ -1909,8 +1918,8 @@ entry:
 ; CHECKASM-LABEL:  .caller_fpr_stack:
 
 ; ASM32PWR4:       mflr 0
-; ASM32PWR4-DAG:   stw 0, 8(1)
-; ASM32PWR4-DAG:   stwu 1, -144(1)
+; ASM32PWR4-NEXT:  stwu 1, -144(1)
+; ASM32PWR4-DAG:   stw 0, 152(1)
 ; ASM32PWR4-DAG:   lwz [[REGF1ADDR:[0-9]+]], L..C20(2)
 ; ASM32PWR4-DAG:   lwz [[REGF1:[0-9]+]], 0([[REGF1ADDR]])
 ; ASM32PWR4-DAG:   lwz [[REGDADDR:[0-9]+]], L..C19(2)
@@ -1941,8 +1950,8 @@ entry:
 ; ASM32PWR4-NEXT:  bl .test_fpr_stack
 
 ; ASM64PWR4:       mflr 0
-; ASM64PWR4-DAG:   std 0, 16(1)
-; ASM64PWR4-DAG:   stdu 1, -176(1)
+; ASM64PWR4-NEXT:  stdu 1, -176(1)
+; ASM64PWR4-DAG:   std 0, 192(1)
 ; ASM64PWR4-DAG:   ld [[REGF1ADDR:[0-9]+]], L..C18(2)
 ; ASM64PWR4-DAG:   lwz [[REGF1:[0-9]+]], 0([[REGF1ADDR]])
 ; ASM64PWR4-DAG:   ld [[REGDADDR:[0-9]+]], L..C19(2)
@@ -2290,8 +2299,8 @@ define void @caller_mix() {
 ; CHEKASM-LABEL:    .mix_floats_caller:
 
 ; ASM32PWR4:       mflr 0
-; ASM32PWR4-DAG:   stw 0, 8(1)
-; ASM32PWR4-DAG:   stwu 1, -176(1)
+; ASM32PWR4-NEXT:  stwu 1, -176(1)
+; ASM32PWR4-DAG:   stw 0, 184(1)
 ; ASM32PWR4-DAG:   stw [[REG:[0-9]+]], 56(1)
 ; ASM32PWR4-DAG:   stw [[REG:[0-9]+]], 60(1)
 ; ASM32PWR4-DAG:   stw [[REG:[0-9]+]], 64(1)
@@ -2323,8 +2332,8 @@ define void @caller_mix() {
 ; ASM32PWR4:       bl .mix_floats
 
 ; ASM64PWR4:      mflr 0
-; ASM64PWR4-DAG:  std 0, 16(1)
-; ASM64PWR4-DAG:  stdu 1, -240(1)
+; ASM64PWR4-NEXT: stdu 1, -240(1)
+; ASM64PWR4-DAG:  std 0, 256(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 112(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 120(1)
 ; ASM64PWR4-DAG:  std [[REG:[0-9]+]], 128(1)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
index 2809697e7a514..5135c6d93bb58 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mem.ll
@@ -107,6 +107,7 @@ entry:
 ; ASM32BIT-DAG:   addi 3, 1, 56
 ; ASM32BIT-DAG:   lwz 4, L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   li 5, 256
+; ASM32BIT-DAG:   stw 0, 328(1)
 ; ASM32BIT-NEXT:  bl .memcpy[PR]
 ; ASM32BIT:       bl .test_byval_mem2
 ; ASM32BIT:       addi 1, 1, 320
@@ -137,6 +138,7 @@ entry:
 ; ASM64BIT-DAG:   addi 3, 1, 112
 ; ASM64BIT-DAG:   ld 4, L..C{{[0-9]+}}(2)
 ; ASM64BIT-DAG:   li 5, 256
+; ASM64BIT-DAG:   std 0, 384(1)
 ; ASM64BIT-NEXT:  bl .memcpy[PR]
 ; ASM64BIT:       bl .test_byval_mem2
 ; ASM64BIT:       addi 1, 1, 368
@@ -319,8 +321,9 @@ entry:
 ; 32BIT-NEXT:     ADJCALLSTACKUP 316, 0, implicit-def dead $r1, implicit $r1
 
 ; ASM32BIT:       stwu 1, -320(1)
-; ASM32BIT-NEXT:  stw [[REG1:[0-9]+]], {{[0-9]+}}(1)
-; ASM32BIT:       lwz [[REG1]], L..C{{[0-9]+}}(2)
+; ASM32BIT-NEXT:  stw 0, 328(1)
+; ASM32BIT-DAG:   stw [[REG1:[0-9]+]], {{[0-9]+}}(1)
+; ASM32BIT-DAG:   lwz [[REG1]], L..C{{[0-9]+}}(2)
 ; ASM32BIT-DAG:   lhz [[REG2:[0-9]+]], 28([[REG1]])
 ; ASM32BIT-DAG:   sth [[REG2]], 56(1)
 ; ASM32BIT-DAG:   lbz [[REG3:[0-9]+]], 30([[REG1]])

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
index a9456d227e16e..e5a816feff441 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
@@ -40,6 +40,7 @@ entry:
 
 ; ASM32:       stwu 1, -64(1)
 ; ASM32-NEXT:  lwz [[REG:[0-9]+]], L..C{{[0-9]+}}(2)
+; ASM32-NEXT:  stw 0, 72(1)
 ; ASM32-NEXT:  lbz 3, 0([[REG]])
 ; ASM32-NEXT:  slwi 3, 3, 24
 ; ASM32-NEXT:  bl .test_byval_1Byte
@@ -53,9 +54,9 @@ entry:
 ; 64BIT-NEXT:  BL8_NOP <mcsymbol .test_byval_1Byte>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-; ASM64:       std 0, 16(1)
-; ASM64-NEXT:  stdu 1, -128(1)
+; ASM64:       stdu 1, -128(1)
 ; ASM64-NEXT:  ld [[REG:[0-9]+]], L..C{{[0-9]+}}(2)
+; ASM64-NEXT:  std 0, 144(1)
 ; ASM64-NEXT:  lbz 3, 0([[REG]])
 ; ASM64-NEXT:  sldi 3, 3, 56
 ; ASM64-NEXT:  bl .test_byval_1Byte
@@ -167,8 +168,8 @@ entry:
 ; 64BIT-NEXT:  ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
 ; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
-; ASM64:       std 0, 16(1)
-; ASM64-NEXT:  stdu 1, -112(1)
+; ASM64:       stdu 1, -112(1)
+; ASM64-DAG:   std 0, 128(1)
 ; ASM64-DAG:   li 3, 42
 ; ASM64-DAG:   ld [[REG1:[0-9]+]], L..C{{[0-9]+}}(2)
 ; ASM64-DAG:   lfs 1, 0([[REG1]])
@@ -672,6 +673,7 @@ declare zeroext i8 @test_byval_8Byte(ptr byval(%struct.S8) align 1)
 
 ; ASM64:       stdu 1, -112(1)
 ; ASM64-NEXT:  ld [[REGADDR:[0-9]+]], L..C{{[0-9]+}}(2)
+; ASM64-NEXT:  std 0, 128(1)
 ; ASM64-NEXT:  ld 3, 0([[REGADDR]])
 ; ASM64-NEXT:  bl .test_byval_8Byte[PR]
 ; ASM64-NEXT:  nop

diff  --git a/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll
index 29c7eed248e8c..279066fb06d87 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll
@@ -47,7 +47,6 @@ entry:
 ; ASM-LABEL:     .vec_caller:
 ; ASM32:         # %bb.0:                                # %entry
 ; ASM32-DAG:     mflr 0
-; ASM32-DAG:     stw 0, 8(1)
 ; ASM32-DAG:     stwu 1, -64(1)
 ; ASM32-DAG:     lwz [[REG1:[0-9]+]], L..C0(2)
 ; ASM32-DAG:     lxvw4x 34, 0, [[REG1]]
@@ -73,6 +72,7 @@ entry:
 ; ASM32-DAG:     lxvw4x 44, 0, [[REG11]]
 ; ASM32-DAG:     lwz [[REG12:[0-9]+]], L..C11(2)
 ; ASM32-DAG:     lxvw4x 45, 0, [[REG12]]
+; ASM32-DAG:     stw 0, 72(1)
 ; ASM32-DAG:     bl .vec_callee
 ; ASM32-DAG:     li 3, 0
 ; ASM32-DAG:     addi 1, 1, 64
@@ -81,7 +81,6 @@ entry:
 ; ASM32:         blr
 
 ; ASM64:         # %entry
-; ASM64-DAG:     std 0, 16(1)
 ; ASM64-DAG:     stdu 1, -112(1)
 ; ASM64-DAG:     ld [[REG1:[0-9]+]], L..C0(2)
 ; ASM64-DAG:     lxvw4x 34, 0, [[REG1]]
@@ -107,6 +106,7 @@ entry:
 ; ASM64-DAG:     lxvw4x 44, 0, [[REG11]]
 ; ASM64-DAG:     ld [[REG12:[0-9]+]], L..C11(2)
 ; ASM64-DAG:     lxvw4x 45, 0, [[REG12]]
+; ASM64-DAG:     std 0, 128(1)
 ; ASM64-DAG:     bl .vec_callee
 ; ASM64-DAG:     li 3, 0
 ; ASM64-DAG:     addi 1, 1, 112

diff  --git a/llvm/test/CodeGen/PowerPC/aix-crspill.ll b/llvm/test/CodeGen/PowerPC/aix-crspill.ll
index af41620e75075..9487c482e9c28 100644
--- a/llvm/test/CodeGen/PowerPC/aix-crspill.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-crspill.ll
@@ -22,11 +22,11 @@ declare signext i32 @do_something(i32 signext)
 
 ; 64BIT-LABEL: .killOne:
 
-; 64BIT:       mflr 0
-; 64BIT-NEXT:  std 0, 16(1)
-; 64BIT-NEXT:  mfcr 12
+; 64BIT:       mfcr 12
 ; 64BIT-NEXT:  stw 12, 8(1)
-; 64BIT:       stdu 1, -112(1)
+; 64BIT-NEXT:  mflr 0
+; 64BIT-NEXT:  stdu 1, -112(1)
+; 64BIT:       std 0, 128(1)
 
 ; 64BIT:       # Clobber CR
 ; 64BIT:       bl .do_something
@@ -40,11 +40,11 @@ declare signext i32 @do_something(i32 signext)
 
 ; 32BIT-LABEL: .killOne:
 
-; 32BIT:       mflr 0
-; 32BIT-NEXT:  stw 0, 8(1)
-; 32BIT-NEXT:  mfcr 12
+; 32BIT:       mfcr 12
 ; 32BIT-NEXT:  stw 12, 4(1)
-; 32BIT:       stwu 1, -64(1)
+; 32BIT-NEXT:  mflr 0
+; 32BIT-NEXT:  stwu 1, -64(1)
+; 32BIT:       stw 0, 72(1)
 
 ; 32BIT:       # Clobber CR
 ; 32BIT:       bl .do_something

diff  --git a/llvm/test/CodeGen/PowerPC/aix-csr.ll b/llvm/test/CodeGen/PowerPC/aix-csr.ll
index ec8ece74c50eb..a9a85c8be5a10 100644
--- a/llvm/test/CodeGen/PowerPC/aix-csr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-csr.ll
@@ -159,8 +159,8 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 ; MIR64: liveins: $x3, $x14, $x25, $x31, $f14, $f19, $f21, $f31
 
 ; MIR64:       $x0 = MFLR8 implicit $lr8
-; MIR64-NEXT:  STD killed $x0, 16, $x1
 ; MIR64-NEXT:  $x1 = STDU $x1, -400, $x1
+; MIR64-NEXT:  STD killed $x0, 416, $x1
 ; MIR64-DAG:   STD killed $x14, 112, $x1 :: (store (s64) into %fixed-stack.6, align 16)
 ; MIR64-DAG:   STD killed $x25, 200, $x1 :: (store (s64) into %fixed-stack.5)
 ; MIR64-DAG:   STD killed $x31, 248, $x1 :: (store (s64) into %fixed-stack.4)
@@ -188,8 +188,8 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 ; MIR32: liveins: $r3, $r13, $r14, $r25, $r31, $f14, $f19, $f21, $f31
 
 ; MIR32:      $r0 = MFLR implicit $lr
-; MIR32-NEXT: STW killed $r0, 8, $r1
 ; MIR32-NEXT: $r1 = STWU $r1, -288, $r1
+; MIR32-NEXT: STW killed $r0, 296, $r1
 ; MIR32-DAG:  STW killed $r13, 68, $r1 :: (store (s32) into %fixed-stack.7)
 ; MIR32-DAG:  STW killed $r14, 72, $r1 :: (store (s32) into %fixed-stack.6, align 8)
 ; MIR32-DAG:  STW killed $r25, 116, $r1 :: (store (s32) into %fixed-stack.5)
@@ -217,8 +217,8 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 
 ; ASM64-LABEL: .fprs_and_gprs:
 ; ASM64:         mflr 0
-; ASM64-NEXT:    std 0, 16(1)
 ; ASM64-NEXT:    stdu 1, -400(1)
+; ASM64-NEXT:    std 0, 416(1)
 ; ASM64-DAG:     std 14, 112(1)                  # 8-byte Folded Spill
 ; ASM64-DAG:     std 25, 200(1)                  # 8-byte Folded Spill
 ; ASM64-DAG:     std 31, 248(1)                  # 8-byte Folded Spill
@@ -243,8 +243,8 @@ define dso_local double @fprs_and_gprs(i32 signext %i) {
 
 ; ASM32-LABEL: .fprs_and_gprs:
 ; ASM32:         mflr 0
-; ASM32-NEXT:    stw 0, 8(1)
 ; ASM32-NEXT:    stwu 1, -288(1)
+; ASM32-NEXT:    stw 0, 296(1)
 ; ASM32-DAG:     stw 13, 68(1)                   # 4-byte Folded Spill
 ; ASM32-DAG:     stw 14, 72(1)                   # 4-byte Folded Spill
 ; ASM32-DAG:     stw 25, 116(1)                  # 4-byte Folded Spill

diff  --git a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll
index 0223f531d4f20..cbcd51f9dd9b4 100644
--- a/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-emit-tracebacktable.ll
@@ -160,7 +160,7 @@ entry:
 ; CHECK-ASM-LABEL:     .main:{{[[:space:]] *}}# %bb.0:
 ; CHECK-FUNC-LABEL:    .csect .main[PR],5{{[[:space:]] *}}# %bb.0
 ; COMMON-NEXT:   mflr 0
-; COMMON-NEXT:   stw 0, 8(1)
+; COMMON:        stw 0, 168(1)
 ; COMMON:        mtlr 0
 ; COMMON-NEXT:   blr
 ; COMMON-NEXT: L..main0:

diff  --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
index d8e225169f115..44281bcc3647d 100644
--- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll
@@ -14,14 +14,14 @@ define dso_local float @frameptr_only(i32 %n, float %f) {
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
 ; AIX32-NEXT:    stw 31, -12(1)
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -80(1)
 ; AIX32-NEXT:    slwi 3, 3, 2
 ; AIX32-NEXT:    mr 31, 1
-; AIX32-NEXT:    stfd 31, 72(31) # 8-byte Folded Spill
-; AIX32-NEXT:    fmr 31, 1
+; AIX32-NEXT:    stw 0, 88(1)
 ; AIX32-NEXT:    addi 3, 3, 15
 ; AIX32-NEXT:    addi 4, 31, 80
+; AIX32-NEXT:    stfd 31, 72(31) # 8-byte Folded Spill
+; AIX32-NEXT:    fmr 31, 1
 ; AIX32-NEXT:    rlwinm 3, 3, 0, 0, 27
 ; AIX32-NEXT:    neg 3, 3
 ; AIX32-NEXT:    stwux 4, 1, 3
@@ -40,14 +40,14 @@ define dso_local float @frameptr_only(i32 %n, float %f) {
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    mflr 0
 ; AIX64-NEXT:    std 31, -16(1)
-; AIX64-NEXT:    std 0, 16(1)
 ; AIX64-NEXT:    stdu 1, -144(1)
 ; AIX64-NEXT:    rldic 3, 3, 2, 30
 ; AIX64-NEXT:    mr 31, 1
-; AIX64-NEXT:    stfd 31, 136(31) # 8-byte Folded Spill
-; AIX64-NEXT:    fmr 31, 1
+; AIX64-NEXT:    std 0, 160(1)
 ; AIX64-NEXT:    addi 3, 3, 15
 ; AIX64-NEXT:    addi 4, 31, 144
+; AIX64-NEXT:    stfd 31, 136(31) # 8-byte Folded Spill
+; AIX64-NEXT:    fmr 31, 1
 ; AIX64-NEXT:    rldicl 3, 3, 60, 4
 ; AIX64-NEXT:    rldicl 3, 3, 4, 29
 ; AIX64-NEXT:    neg 3, 3

diff  --git a/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
index ecb20aa704764..920c0a4b5ba31 100644
--- a/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-llvm-intrinsic.ll
@@ -72,14 +72,14 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
 ; CHECKRELOC32:      00000000 (idx: 7) .bar:
 ; CHECKRELOC64:      0000000000000000 (idx: 7) .bar:
 ; CHECKRELOC-NEXT:        0: 7c 08 02 a6                        mflr 0
-; CHECKRELOC32-NEXT:        4: 90 01 00 08                      stw 0, 8(1)
-; CHECKRELOC32-NEXT:        8: 94 21 ff c0                      stwu 1, -64(1)
-; CHECKRELOC32-NEXT:        c: 80 62 00 00                      lwz 3, 0(2)
-; CHECKRELOC64-NEXT:        4: f8 01 00 10                      std 0, 16(1)
-; CHECKRELOC64-NEXT:        8: f8 21 ff 91                      stdu 1, -112(1)
-; CHECKRELOC64-NEXT:        c: e8 62 00 00                      ld 3, 0(2)
-; CHECKRELOC32-NEXT:    0000000e:  R_TOC        (idx: 13) s[TC]
-; CHECKRELOC64-NEXT:    000000000000000e:  R_TOC	(idx: 13) s[TC]
+; CHECKRELOC32-NEXT:        4: 94 21 ff c0                      stwu 1, -64(1)
+; CHECKRELOC32-NEXT:        8: 80 62 00 00                      lwz 3, 0(2)
+; CHECKRELOC32-NEXT:    0000000a:  R_TOC        (idx: 13) s[TC]
+; CHECKRELOC32-NEXT:        c: 90 01 00 48                      stw 0, 72(1)
+; CHECKRELOC64-NEXT:        4: f8 21 ff 91                      stdu 1, -112(1)
+; CHECKRELOC64-NEXT:        8: e8 62 00 00                      ld 3, 0(2)
+; CHECKRELOC64-NEXT:    000000000000000a:  R_TOC	(idx: 13) s[TC]
+; CHECKRELOC64-NEXT:        c: f8 01 00 80                      std 0, 128(1)
 ; CHECKRELOC-NEXT:       10: 80 83 00 04                        lwz 4, 4(3)
 ; CHECKRELOC-NEXT:       14: 7c 85 23 78                        mr 5, 4
 ; CHECKRELOC-NEXT:       18: 4b ff ff e9                        bl 0x0

diff  --git a/llvm/test/CodeGen/PowerPC/aix-lr.ll b/llvm/test/CodeGen/PowerPC/aix-lr.ll
index 9383eab1e003d..d9c76ac67ebae 100644
--- a/llvm/test/CodeGen/PowerPC/aix-lr.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-lr.ll
@@ -9,8 +9,8 @@ define void @bar() {
 ; 32BIT-LABEL: bar:
 ; 32BIT:       # %bb.0: # %entry
 ; 32BIT-NEXT:    mflr 0
-; 32BIT-NEXT:    stw 0, 8(1)
 ; 32BIT-NEXT:    stwu 1, -64(1)
+; 32BIT-NEXT:    stw 0, 72(1)
 ; 32BIT-NEXT:    bl .foo[PR]
 ; 32BIT-NEXT:    nop
 ; 32BIT-NEXT:    addi 1, 1, 64
@@ -21,8 +21,8 @@ define void @bar() {
 ; 64BIT-LABEL: bar:
 ; 64BIT:       # %bb.0: # %entry
 ; 64BIT-NEXT:    mflr 0
-; 64BIT-NEXT:    std 0, 16(1)
 ; 64BIT-NEXT:    stdu 1, -112(1)
+; 64BIT-NEXT:    std 0, 128(1)
 ; 64BIT-NEXT:    bl .foo[PR]
 ; 64BIT-NEXT:    nop
 ; 64BIT-NEXT:    addi 1, 1, 112

diff  --git a/llvm/test/CodeGen/PowerPC/aix-sret-param.ll b/llvm/test/CodeGen/PowerPC/aix-sret-param.ll
index 0fed04dfca0b7..3c40fc3c3e881 100644
--- a/llvm/test/CodeGen/PowerPC/aix-sret-param.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-sret-param.ll
@@ -53,12 +53,14 @@ declare void @bar(ptr sret(%struct.T))
 
 ; ASM32:       stwu 1, -64(1)
 ; ASM32-NEXT:  addi 3, 1, 56
+; ASM32-NEXT:  stw 0, 72(1)
 ; ASM32-NEXT:  bl .foo[PR]
 ; ASM32-NEXT:  nop
 ; ASM32-NEXT:  addi 1, 1, 64
 
 ; ASM64:       stdu 1, -128(1)
 ; ASM64-NEXT:  addi 3, 1, 120
+; ASM64-NEXT:  std 0, 144(1)
 ; ASM64-NEXT:  bl .foo[PR]
 ; ASM64-NEXT:  nop
 ; ASM64-NEXT:  addi 1, 1, 128
@@ -86,6 +88,7 @@ declare void @bar(ptr sret(%struct.T))
 
 ; ASM32:        stwu 1, -80(1)
 ; ASM32-NEXT:   addi 3, 1, 56
+; ASM32-NEXT:   stw 0, 88(1)
 ; ASM32-NEXT:   bl .bar[PR]
 ; ASM32-NEXT:   nop
 ; ASM32-NEXT:   addi 1, 1, 80
@@ -93,6 +96,7 @@ declare void @bar(ptr sret(%struct.T))
 
 ; ASM64:        stdu 1, -144(1)
 ; ASM64-NEXT:   addi 3, 1, 120
+; ASM64-NEXT:   std 0, 160(1)
 ; ASM64-NEXT:   bl .bar[PR]
 ; ASM64-NEXT:   nop
 ; ASM64-NEXT:   addi 1, 1, 144

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
index 190d57a07b3cf..46ce3bfd450bd 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec \
 ; RUN:      -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
 ; RUN:      --check-prefix=SMALL32
@@ -22,10 +23,10 @@ define void @storesTGUninit(double %Val) #0 {
 ; SMALL32-LABEL: storesTGUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C0(2)
-; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    lwz 3, L..C0(2) # target-flags(ppc-lo) @TGUninit
+; SMALL32-NEXT:    lwz 4, L..C1(2) # target-flags(ppc-tlsgd) @TGUninit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -36,8 +37,8 @@ define void @storesTGUninit(double %Val) #0 {
 ; LARGE32-LABEL: storesTGUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C1 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C0 at l(3)
@@ -52,10 +53,10 @@ define void @storesTGUninit(double %Val) #0 {
 ; SMALL64-LABEL: storesTGUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C0(2)
-; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    ld 3, L..C0(2) # target-flags(ppc-lo) @TGUninit
+; SMALL64-NEXT:    ld 4, L..C1(2) # target-flags(ppc-tlsgd) @TGUninit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -66,10 +67,10 @@ define void @storesTGUninit(double %Val) #0 {
 ; LARGE64-LABEL: storesTGUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -88,10 +89,10 @@ define void @storesTGInit(double %Val) #0 {
 ; SMALL32-LABEL: storesTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2) # target-flags(ppc-lo) @TGInit
+; SMALL32-NEXT:    lwz 4, L..C3(2) # target-flags(ppc-tlsgd) @TGInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -102,8 +103,8 @@ define void @storesTGInit(double %Val) #0 {
 ; LARGE32-LABEL: storesTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
@@ -118,10 +119,10 @@ define void @storesTGInit(double %Val) #0 {
 ; SMALL64-LABEL: storesTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2) # target-flags(ppc-lo) @TGInit
+; SMALL64-NEXT:    ld 4, L..C3(2) # target-flags(ppc-tlsgd) @TGInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -132,10 +133,10 @@ define void @storesTGInit(double %Val) #0 {
 ; LARGE64-LABEL: storesTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -154,10 +155,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL32-LABEL: storesTIInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-lo) @TIInit
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -168,8 +169,8 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE32-LABEL: storesTIInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
@@ -184,10 +185,10 @@ define void @storesTIInit(double %Val) #0 {
 ; SMALL64-LABEL: storesTIInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-lo) @TIInit
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -198,10 +199,10 @@ define void @storesTIInit(double %Val) #0 {
 ; LARGE64-LABEL: storesTIInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -220,10 +221,10 @@ define void @storesTWInit(double %Val) #0 {
 ; SMALL32-LABEL: storesTWInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C6(2) # target-flags(ppc-lo) @TWInit
+; SMALL32-NEXT:    lwz 4, L..C7(2) # target-flags(ppc-tlsgd) @TWInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stfd 1, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -234,8 +235,8 @@ define void @storesTWInit(double %Val) #0 {
 ; LARGE32-LABEL: storesTWInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
@@ -250,10 +251,10 @@ define void @storesTWInit(double %Val) #0 {
 ; SMALL64-LABEL: storesTWInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C6(2) # target-flags(ppc-lo) @TWInit
+; SMALL64-NEXT:    ld 4, L..C7(2) # target-flags(ppc-tlsgd) @TWInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stfd 1, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -264,10 +265,10 @@ define void @storesTWInit(double %Val) #0 {
 ; LARGE64-LABEL: storesTWInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -286,12 +287,12 @@ define double @loadsTGUninit() #1 {
 ; SMALL32-LABEL: loadsTGUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C0(2)
-; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    lwz 3, L..C0(2) # target-flags(ppc-lo) @TGUninit
+; SMALL32-NEXT:    lwz 4, L..C1(2) # target-flags(ppc-tlsgd) @TGUninit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -303,8 +304,8 @@ define double @loadsTGUninit() #1 {
 ; LARGE32-LABEL: loadsTGUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C1 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C0 at l(3)
@@ -323,12 +324,12 @@ define double @loadsTGUninit() #1 {
 ; SMALL64-LABEL: loadsTGUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C0(2)
-; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    ld 3, L..C0(2) # target-flags(ppc-lo) @TGUninit
+; SMALL64-NEXT:    ld 4, L..C1(2) # target-flags(ppc-tlsgd) @TGUninit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -340,10 +341,10 @@ define double @loadsTGUninit() #1 {
 ; LARGE64-LABEL: loadsTGUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -368,12 +369,12 @@ define double @loadsTGInit() #1 {
 ; SMALL32-LABEL: loadsTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C2(2)
-; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    lwz 3, L..C2(2) # target-flags(ppc-lo) @TGInit
+; SMALL32-NEXT:    lwz 4, L..C3(2) # target-flags(ppc-tlsgd) @TGInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -385,8 +386,8 @@ define double @loadsTGInit() #1 {
 ; LARGE32-LABEL: loadsTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
@@ -405,12 +406,12 @@ define double @loadsTGInit() #1 {
 ; SMALL64-LABEL: loadsTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C2(2)
-; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    ld 3, L..C2(2) # target-flags(ppc-lo) @TGInit
+; SMALL64-NEXT:    ld 4, L..C3(2) # target-flags(ppc-tlsgd) @TGInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -422,10 +423,10 @@ define double @loadsTGInit() #1 {
 ; LARGE64-LABEL: loadsTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -450,12 +451,12 @@ define double @loadsTIInit() #1 {
 ; SMALL32-LABEL: loadsTIInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C4(2)
-; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    lwz 3, L..C4(2) # target-flags(ppc-lo) @TIInit
+; SMALL32-NEXT:    lwz 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -467,8 +468,8 @@ define double @loadsTIInit() #1 {
 ; LARGE32-LABEL: loadsTIInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
@@ -487,12 +488,12 @@ define double @loadsTIInit() #1 {
 ; SMALL64-LABEL: loadsTIInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C4(2)
-; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    ld 3, L..C4(2) # target-flags(ppc-lo) @TIInit
+; SMALL64-NEXT:    ld 4, L..C5(2) # target-flags(ppc-tlsgd) @TIInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -504,10 +505,10 @@ define double @loadsTIInit() #1 {
 ; LARGE64-LABEL: loadsTIInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -532,12 +533,12 @@ define double @loadsTWInit() #1 {
 ; SMALL32-LABEL: loadsTWInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
-; SMALL32-NEXT:    lwz 3, L..C6(2)
-; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    lwz 3, L..C6(2) # target-flags(ppc-lo) @TWInit
+; SMALL32-NEXT:    lwz 4, L..C7(2) # target-flags(ppc-tlsgd) @TWInit
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
-; SMALL32-NEXT:    lwz 4, L..C8(2)
+; SMALL32-NEXT:    lwz 4, L..C8(2) # @GInit
 ; SMALL32-NEXT:    lfd 0, 0(3)
 ; SMALL32-NEXT:    lfd 1, 0(4)
 ; SMALL32-NEXT:    fadd 1, 0, 1
@@ -549,8 +550,8 @@ define double @loadsTWInit() #1 {
 ; LARGE32-LABEL: loadsTWInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
@@ -569,12 +570,12 @@ define double @loadsTWInit() #1 {
 ; SMALL64-LABEL: loadsTWInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
-; SMALL64-NEXT:    ld 3, L..C6(2)
-; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    ld 3, L..C6(2) # target-flags(ppc-lo) @TWInit
+; SMALL64-NEXT:    ld 4, L..C7(2) # target-flags(ppc-tlsgd) @TWInit
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
-; SMALL64-NEXT:    ld 4, L..C8(2)
+; SMALL64-NEXT:    ld 4, L..C8(2) # @GInit
 ; SMALL64-NEXT:    lfd 0, 0(3)
 ; SMALL64-NEXT:    lfd 1, 0(4)
 ; SMALL64-NEXT:    fadd 1, 0, 1
@@ -586,10 +587,10 @@ define double @loadsTWInit() #1 {
 ; LARGE64-LABEL: loadsTWInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
index 34ebe758343e5..887c4521a4c90 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll
@@ -22,11 +22,11 @@ define void @storesTGUninit(i32 %Val) #0 {
 ; SMALL32-LABEL: storesTGUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C0(2)
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -37,8 +37,8 @@ define void @storesTGUninit(i32 %Val) #0 {
 ; LARGE32-LABEL: storesTGUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C1 at u(2)
@@ -54,11 +54,11 @@ define void @storesTGUninit(i32 %Val) #0 {
 ; SMALL64-LABEL: storesTGUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C0(2)
 ; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -69,11 +69,11 @@ define void @storesTGUninit(i32 %Val) #0 {
 ; LARGE64-LABEL: storesTGUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -92,11 +92,11 @@ define void @storesTGInit(i32 %Val) #0 {
 ; SMALL32-LABEL: storesTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -107,8 +107,8 @@ define void @storesTGInit(i32 %Val) #0 {
 ; LARGE32-LABEL: storesTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C3 at u(2)
@@ -124,11 +124,11 @@ define void @storesTGInit(i32 %Val) #0 {
 ; SMALL64-LABEL: storesTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -139,11 +139,11 @@ define void @storesTGInit(i32 %Val) #0 {
 ; LARGE64-LABEL: storesTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -162,11 +162,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL32-LABEL: storesTIUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -177,8 +177,8 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE32-LABEL: storesTIUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C5 at u(2)
@@ -194,11 +194,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; SMALL64-LABEL: storesTIUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -209,11 +209,11 @@ define void @storesTIUninit(i32 %Val) #0 {
 ; LARGE64-LABEL: storesTIUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -232,11 +232,11 @@ define void @storesTWUninit(i32 %Val) #0 {
 ; SMALL32-LABEL: storesTWUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 3
 ; SMALL32-NEXT:    lwz 3, L..C6(2)
 ; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 0(3)
 ; SMALL32-NEXT:    addi 1, 1, 32
@@ -247,8 +247,8 @@ define void @storesTWUninit(i32 %Val) #0 {
 ; LARGE32-LABEL: storesTWUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 3
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C7 at u(2)
@@ -264,11 +264,11 @@ define void @storesTWUninit(i32 %Val) #0 {
 ; SMALL64-LABEL: storesTWUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C6(2)
 ; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    stw 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -279,11 +279,11 @@ define void @storesTWUninit(i32 %Val) #0 {
 ; LARGE64-LABEL: storesTWUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -302,10 +302,10 @@ define i32 @loadsTGUninit() #1 {
 ; SMALL32-LABEL: loadsTGUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C0(2)
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -319,8 +319,8 @@ define i32 @loadsTGUninit() #1 {
 ; LARGE32-LABEL: loadsTGUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C1 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C0 at l(3)
@@ -339,10 +339,10 @@ define i32 @loadsTGUninit() #1 {
 ; SMALL64-LABEL: loadsTGUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C0(2)
 ; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    lwz 3, 0(3)
@@ -356,10 +356,10 @@ define i32 @loadsTGUninit() #1 {
 ; LARGE64-LABEL: loadsTGUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -384,10 +384,10 @@ define i32 @loadsTGInit() #1 {
 ; SMALL32-LABEL: loadsTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -401,8 +401,8 @@ define i32 @loadsTGInit() #1 {
 ; LARGE32-LABEL: loadsTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
@@ -421,10 +421,10 @@ define i32 @loadsTGInit() #1 {
 ; SMALL64-LABEL: loadsTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    lwz 3, 0(3)
@@ -438,10 +438,10 @@ define i32 @loadsTGInit() #1 {
 ; LARGE64-LABEL: loadsTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -466,10 +466,10 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL32-LABEL: loadsTIUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -483,8 +483,8 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE32-LABEL: loadsTIUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
@@ -503,10 +503,10 @@ define i32 @loadsTIUninit() #1 {
 ; SMALL64-LABEL: loadsTIUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    lwz 3, 0(3)
@@ -520,10 +520,10 @@ define i32 @loadsTIUninit() #1 {
 ; LARGE64-LABEL: loadsTIUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -548,10 +548,10 @@ define i32 @loadsTWUninit() #1 {
 ; SMALL32-LABEL: loadsTWUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C6(2)
 ; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 3, 0(3)
@@ -565,8 +565,8 @@ define i32 @loadsTWUninit() #1 {
 ; LARGE32-LABEL: loadsTWUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
@@ -585,10 +585,10 @@ define i32 @loadsTWUninit() #1 {
 ; SMALL64-LABEL: loadsTWUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C6(2)
 ; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    lwz 3, 0(3)
@@ -602,10 +602,10 @@ define i32 @loadsTWUninit() #1 {
 ; LARGE64-LABEL: loadsTWUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
index 34e83221c4452..47813b59ba804 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll
@@ -22,12 +22,12 @@ define void @storesTGInit(i64 %Val) #0 {
 ; SMALL32-LABEL: storesTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
 ; SMALL32-NEXT:    lwz 3, L..C0(2)
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
@@ -39,8 +39,8 @@ define void @storesTGInit(i64 %Val) #0 {
 ; LARGE32-LABEL: storesTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
@@ -58,11 +58,11 @@ define void @storesTGInit(i64 %Val) #0 {
 ; SMALL64-LABEL: storesTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C0(2)
 ; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -73,11 +73,11 @@ define void @storesTGInit(i64 %Val) #0 {
 ; LARGE64-LABEL: storesTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -96,12 +96,12 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL32-LABEL: storesTIUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
@@ -113,8 +113,8 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE32-LABEL: storesTIUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
@@ -132,11 +132,11 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; SMALL64-LABEL: storesTIUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -147,11 +147,11 @@ define void @storesTIUninit(i64 %Val) #0 {
 ; LARGE64-LABEL: storesTIUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -170,12 +170,12 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL32-LABEL: storesTIInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
@@ -187,8 +187,8 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE32-LABEL: storesTIInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
@@ -206,11 +206,11 @@ define void @storesTIInit(i64 %Val) #0 {
 ; SMALL64-LABEL: storesTIInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -221,11 +221,11 @@ define void @storesTIInit(i64 %Val) #0 {
 ; LARGE64-LABEL: storesTIInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -244,12 +244,12 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL32-LABEL: storesTWInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    mr 6, 4
 ; SMALL32-NEXT:    mr 7, 3
 ; SMALL32-NEXT:    lwz 3, L..C6(2)
 ; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    stw 6, 4(3)
 ; SMALL32-NEXT:    stw 7, 0(3)
@@ -261,8 +261,8 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE32-LABEL: storesTWInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    mr 6, 4
 ; LARGE32-NEXT:    mr 7, 3
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
@@ -280,11 +280,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; SMALL64-LABEL: storesTWInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    mr 6, 3
 ; SMALL64-NEXT:    ld 3, L..C6(2)
 ; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    std 6, 0(3)
 ; SMALL64-NEXT:    addi 1, 1, 48
@@ -295,11 +295,11 @@ define void @storesTWInit(i64 %Val) #0 {
 ; LARGE64-LABEL: storesTWInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    mr 6, 3
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -318,10 +318,10 @@ define i64 @loadsTGInit() #1 {
 ; SMALL32-LABEL: loadsTGInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C0(2)
 ; SMALL32-NEXT:    lwz 4, L..C1(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
@@ -338,8 +338,8 @@ define i64 @loadsTGInit() #1 {
 ; LARGE32-LABEL: loadsTGInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C1 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C0 at l(3)
@@ -361,10 +361,10 @@ define i64 @loadsTGInit() #1 {
 ; SMALL64-LABEL: loadsTGInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C0(2)
 ; SMALL64-NEXT:    ld 4, L..C1(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
@@ -378,10 +378,10 @@ define i64 @loadsTGInit() #1 {
 ; LARGE64-LABEL: loadsTGInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C0 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C1 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C0 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C1 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -406,10 +406,10 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL32-LABEL: loadsTIUninit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C2(2)
 ; SMALL32-NEXT:    lwz 4, L..C3(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
@@ -426,8 +426,8 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE32-LABEL: loadsTIUninit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C3 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C2 at l(3)
@@ -449,10 +449,10 @@ define i64 @loadsTIUninit() #1 {
 ; SMALL64-LABEL: loadsTIUninit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C2(2)
 ; SMALL64-NEXT:    ld 4, L..C3(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
@@ -466,10 +466,10 @@ define i64 @loadsTIUninit() #1 {
 ; LARGE64-LABEL: loadsTIUninit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C2 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C3 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C2 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C3 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -494,10 +494,10 @@ define i64 @loadsTIInit() #1 {
 ; SMALL32-LABEL: loadsTIInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C4(2)
 ; SMALL32-NEXT:    lwz 4, L..C5(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
@@ -514,8 +514,8 @@ define i64 @loadsTIInit() #1 {
 ; LARGE32-LABEL: loadsTIInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C5 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C4 at l(3)
@@ -537,10 +537,10 @@ define i64 @loadsTIInit() #1 {
 ; SMALL64-LABEL: loadsTIInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C4(2)
 ; SMALL64-NEXT:    ld 4, L..C5(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
@@ -554,10 +554,10 @@ define i64 @loadsTIInit() #1 {
 ; LARGE64-LABEL: loadsTIInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C4 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C5 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C4 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C5 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]
@@ -582,10 +582,10 @@ define i64 @loadsTWInit() #1 {
 ; SMALL32-LABEL: loadsTWInit:
 ; SMALL32:       # %bb.0: # %entry
 ; SMALL32-NEXT:    mflr 0
-; SMALL32-NEXT:    stw 0, 8(1)
 ; SMALL32-NEXT:    stwu 1, -32(1)
 ; SMALL32-NEXT:    lwz 3, L..C6(2)
 ; SMALL32-NEXT:    lwz 4, L..C7(2)
+; SMALL32-NEXT:    stw 0, 40(1)
 ; SMALL32-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL32-NEXT:    lwz 4, L..C8(2)
 ; SMALL32-NEXT:    lwz 5, 4(3)
@@ -602,8 +602,8 @@ define i64 @loadsTWInit() #1 {
 ; LARGE32-LABEL: loadsTWInit:
 ; LARGE32:       # %bb.0: # %entry
 ; LARGE32-NEXT:    mflr 0
-; LARGE32-NEXT:    stw 0, 8(1)
 ; LARGE32-NEXT:    stwu 1, -32(1)
+; LARGE32-NEXT:    stw 0, 40(1)
 ; LARGE32-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE32-NEXT:    addis 4, L..C7 at u(2)
 ; LARGE32-NEXT:    lwz 3, L..C6 at l(3)
@@ -625,10 +625,10 @@ define i64 @loadsTWInit() #1 {
 ; SMALL64-LABEL: loadsTWInit:
 ; SMALL64:       # %bb.0: # %entry
 ; SMALL64-NEXT:    mflr 0
-; SMALL64-NEXT:    std 0, 16(1)
 ; SMALL64-NEXT:    stdu 1, -48(1)
 ; SMALL64-NEXT:    ld 3, L..C6(2)
 ; SMALL64-NEXT:    ld 4, L..C7(2)
+; SMALL64-NEXT:    std 0, 64(1)
 ; SMALL64-NEXT:    bla .__tls_get_addr[PR]
 ; SMALL64-NEXT:    ld 4, L..C8(2)
 ; SMALL64-NEXT:    ld 3, 0(3)
@@ -642,10 +642,10 @@ define i64 @loadsTWInit() #1 {
 ; LARGE64-LABEL: loadsTWInit:
 ; LARGE64:       # %bb.0: # %entry
 ; LARGE64-NEXT:    mflr 0
-; LARGE64-NEXT:    std 0, 16(1)
 ; LARGE64-NEXT:    stdu 1, -48(1)
 ; LARGE64-NEXT:    addis 3, L..C6 at u(2)
 ; LARGE64-NEXT:    addis 4, L..C7 at u(2)
+; LARGE64-NEXT:    std 0, 64(1)
 ; LARGE64-NEXT:    ld 3, L..C6 at l(3)
 ; LARGE64-NEXT:    ld 4, L..C7 at l(4)
 ; LARGE64-NEXT:    bla .__tls_get_addr[PR]

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
index 423719ec04c8f..0f7ceb8003655 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll
@@ -524,8 +524,8 @@ entry:
 ; DIS:      Disassembly of section .text:
 ; DIS:      00000000 (idx: 5) .storesTIInit:
 ; DIS-NEXT:                                       mflr 0
-; DIS-NEXT:                                       stw 0, 8(1)
 ; DIS-NEXT:                                       stwu 1, -32(1)
+; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT:                                       mr 6, 4
 ; DIS-NEXT:                                       mr 7, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
@@ -546,8 +546,8 @@ entry:
 ; DIS-NEXT:                                       blr
 ; DIS:      00000040 (idx: 7) .loadsTWInit:
 ; DIS-NEXT:                                       mflr 0
-; DIS-NEXT:                                       stw 0, 8(1)
 ; DIS-NEXT:                                       stwu 1, -32(1)
+; DIS-NEXT:                                       stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 3, 2, 0
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOCU (idx: 21) .TWInit[TE]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}                addis 4, 2, 0

diff  --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
index 2550904e65fec..899ec4a4dd9c6 100644
--- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll
@@ -32,7 +32,7 @@ entry:
 ; RELOC-NEXT: Relocations [
 ; RELOC-NEXT:   Section (index: 1) .text {
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x12
+; RELOC-NEXT:     Virtual Address: 0xE
 ; RELOC-NEXT:     Symbol: .TIUninit (23)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -40,7 +40,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x16
+; RELOC-NEXT:     Virtual Address: 0x12
 ; RELOC-NEXT:     Symbol: TIUninit (25)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -56,7 +56,7 @@ entry:
 ; RELOC-NEXT:     Type: R_RBA (0x18)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x3E
+; RELOC-NEXT:     Virtual Address: 0x3A
 ; RELOC-NEXT:     Symbol: .TGInit (27)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -64,7 +64,7 @@ entry:
 ; RELOC-NEXT:     Type: R_TOC (0x3)
 ; RELOC-NEXT:   }
 ; RELOC-NEXT:   Relocation {
-; RELOC-NEXT:     Virtual Address: 0x42
+; RELOC-NEXT:     Virtual Address: 0x3E
 ; RELOC-NEXT:     Symbol: TGInit (29)
 ; RELOC-NEXT:     IsSigned: No
 ; RELOC-NEXT:     FixupBitValue: 0
@@ -565,13 +565,13 @@ entry:
 ; DIS:      Disassembly of section .text:
 ; DIS:      00000000 (idx: 5) .storesTIUninit:
 ; DIS-NEXT:                                      mflr 0
-; DIS-NEXT:                                      stw 0, 8(1)
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT:                                      mr 6, 3
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 0(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 23) .TIUninit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 4(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 25) TIUninit[TC]
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
 ; DIS-NEXT:                                      stw 6, 0(3)
@@ -581,12 +581,12 @@ entry:
 ; DIS-NEXT:                                      blr
 ; DIS:      00000030 (idx: 7) .loadsTGInit:
 ; DIS-NEXT:                                      mflr 0
-; DIS-NEXT:                                      stw 0, 8(1)
 ; DIS-NEXT:                                      stwu 1, -32(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 3, 8(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 27) .TGInit[TC]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 12(2)
 ; DIS-NEXT: {{0*}}[[#ADDR + 2]]: R_TOC (idx: 29) TGInit[TC]
+; DIS-NEXT:                                      stw 0, 40(1)
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               bla 0
 ; DIS-NEXT: {{0*}}[[#ADDR]]: R_RBA (idx: 1)      .__tls_get_addr[PR]
 ; DIS-NEXT: [[#%x, ADDR:]]: {{.*}}               lwz 4, 16(2)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
index 8e8942654cd8f..26c1e7e0ace02 100644
--- a/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-user-defined-memcpy.ll
@@ -106,8 +106,8 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture reado
 ; 32-DIS-NEXT:        c: 60 00 00 00                   nop
 ; 32-DIS:      00000010 <.call_memcpy>:
 ; 32-DIS-NEXT:       10: 7c 08 02 a6                   mflr 0
-; 32-DIS-NEXT:       14: 90 01 00 08                   stw 0, 8(1)
-; 32-DIS-NEXT:       18: 94 21 ff c0                   stwu 1, -64(1)
+; 32-DIS-NEXT:       14: 94 21 ff c0                   stwu 1, -64(1)
+; 32-DIS-NEXT:       18: 90 01 00 48                   stw 0, 72(1)
 ; 32-DIS-NEXT:       1c: 4b ff ff e5                   bl 0x0
 ; 32-DIS-NEXT:       20: 60 00 00 00                   nop
 ; 32-DIS-NEXT:       24: 38 21 00 40                   addi 1, 1, 64

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
index 649abaa04a95e..03602e06bdbff 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll
@@ -14,16 +14,16 @@ define double @caller() {
 ; 32BIT-LABEL: caller:
 ; 32BIT:       # %bb.0: # %entry
 ; 32BIT-NEXT:    mflr 0
-; 32BIT-NEXT:    stw 0, 8(1)
 ; 32BIT-NEXT:    stwu 1, -192(1)
 ; 32BIT-NEXT:    lis 3, 16392
+; 32BIT-NEXT:    stw 0, 200(1)
 ; 32BIT-NEXT:    lis 4, 16384
 ; 32BIT-NEXT:    xxlxor 0, 0, 0
 ; 32BIT-NEXT:    lwz 5, L..C0(2) # %const.0
-; 32BIT-NEXT:    li 6, 512
 ; 32BIT-NEXT:    stw 3, 180(1)
 ; 32BIT-NEXT:    li 3, 0
 ; 32BIT-NEXT:    xxlxor 1, 1, 1
+; 32BIT-NEXT:    li 6, 512
 ; 32BIT-NEXT:    stw 4, 172(1)
 ; 32BIT-NEXT:    lis 4, 16368
 ; 32BIT-NEXT:    xxlxor 2, 2, 2
@@ -82,19 +82,19 @@ define double @caller() {
 ; 64BIT-LABEL: caller:
 ; 64BIT:       # %bb.0: # %entry
 ; 64BIT-NEXT:    mflr 0
-; 64BIT-NEXT:    std 0, 16(1)
 ; 64BIT-NEXT:    stdu 1, -224(1)
 ; 64BIT-NEXT:    li 3, 2049
+; 64BIT-NEXT:    std 0, 240(1)
 ; 64BIT-NEXT:    li 4, 1
 ; 64BIT-NEXT:    xxlxor 0, 0, 0
-; 64BIT-NEXT:    li 5, 0
 ; 64BIT-NEXT:    rldic 3, 3, 51, 1
 ; 64BIT-NEXT:    rldic 4, 4, 62, 1
+; 64BIT-NEXT:    li 5, 0
 ; 64BIT-NEXT:    xxlxor 1, 1, 1
-; 64BIT-NEXT:    li 6, 144
 ; 64BIT-NEXT:    std 3, 216(1)
 ; 64BIT-NEXT:    li 3, 1023
 ; 64BIT-NEXT:    xxlxor 2, 2, 2
+; 64BIT-NEXT:    li 6, 144
 ; 64BIT-NEXT:    rldic 3, 3, 52, 2
 ; 64BIT-NEXT:    std 4, 208(1)
 ; 64BIT-NEXT:    li 4, 160

diff  --git a/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
index e8b8b2ffc2468..509efaac381a0 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll
@@ -22,11 +22,11 @@ define dso_local i32 @vec_caller() {
 ; 32BIT-LABEL: vec_caller:
 ; 32BIT:       # %bb.0: # %entry
 ; 32BIT-NEXT:    mflr 0
-; 32BIT-NEXT:    stw 0, 8(1)
 ; 32BIT-NEXT:    stwu 1, -64(1)
 ; 32BIT-NEXT:    lwz 3, L..C0(2)
 ; 32BIT-NEXT:    lwz 4, L..C1(2)
 ; 32BIT-NEXT:    xxlxor 34, 34, 34
+; 32BIT-NEXT:    stw 0, 72(1)
 ; 32BIT-NEXT:    xxlxor 35, 35, 35
 ; 32BIT-NEXT:    xxlxor 36, 36, 36
 ; 32BIT-NEXT:    lxvw4x 0, 0, 3
@@ -55,11 +55,11 @@ define dso_local i32 @vec_caller() {
 ; 64BIT-LABEL: vec_caller:
 ; 64BIT:       # %bb.0: # %entry
 ; 64BIT-NEXT:    mflr 0
-; 64BIT-NEXT:    std 0, 16(1)
 ; 64BIT-NEXT:    stdu 1, -112(1)
 ; 64BIT-NEXT:    ld 3, L..C0(2)
 ; 64BIT-NEXT:    ld 4, L..C1(2)
 ; 64BIT-NEXT:    xxlxor 34, 34, 34
+; 64BIT-NEXT:    std 0, 128(1)
 ; 64BIT-NEXT:    xxlxor 35, 35, 35
 ; 64BIT-NEXT:    xxlxor 36, 36, 36
 ; 64BIT-NEXT:    lxvw4x 0, 0, 3

diff  --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll
index 3e4cc02f124af..ad47ef7b0d083 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-reloc.ll
@@ -459,9 +459,9 @@ declare i32 @bar(i32)
 ; DIS:      Disassembly of section .text:
 ; DIS:      00000000 <.foo>:
 ; DIS-NEXT:        0: 7c 08 02 a6                   mflr 0
-; DIS-NEXT:        4: 90 01 00 08                   stw 0, 8(1)
-; DIS-NEXT:        8: 94 21 ff c0                   stwu 1, -64(1)
-; DIS-NEXT:        c: 38 60 00 01                   li 3, 1
+; DIS-NEXT:        4: 94 21 ff c0                   stwu 1, -64(1)
+; DIS-NEXT:        8: 38 60 00 01                   li 3, 1
+; DIS-NEXT:        c: 90 01 00 48                   stw 0, 72(1)
 ; DIS-NEXT:       10: 4b ff ff f1                   bl 0x0
 ; DIS-NEXT:       14: 60 00 00 00                   nop
 ; DIS-NEXT:       18: 80 82 00 00                   lwz 4, 0(2)
@@ -511,9 +511,9 @@ declare i32 @bar(i32)
 ; DIS64:      Disassembly of section .text:
 ; DIS64:      0000000000000000 <.foo>:
 ; DIS64-NEXT:        0: 7c 08 02 a6  	mflr 0
-; DIS64-NEXT:        4: f8 01 00 10  	std 0, 16(1)
-; DIS64-NEXT:        8: f8 21 ff 91  	stdu 1, -112(1)
-; DIS64-NEXT:        c: 38 60 00 01  	li 3, 1
+; DIS64-NEXT:        4: f8 21 ff 91  	stdu 1, -112(1)
+; DIS64-NEXT:        8: 38 60 00 01  	li 3, 1
+; DIS64-NEXT:        c: f8 01 00 80  	std 0, 128(1)
 ; DIS64-NEXT:       10: 4b ff ff f1  	bl 0x0 <.foo>
 ; DIS64-NEXT:       14: 60 00 00 00  	nop
 ; DIS64-NEXT:       18: e8 82 00 00  	ld 4, 0(2)

diff  --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll
index f07521a7e0a81..21e2eff19fa30 100644
--- a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll
@@ -107,8 +107,8 @@ declare i32 @"f\40o"(...)
 ; OBJ-EMPTY:
 ; OBJ-NEXT:  00000000 (idx: 7) .f$o:
 ; OBJ-NEXT:         0: 7c 08 02 a6   mflr 0
-; OBJ-NEXT:         4: 90 01 00 08   stw 0, 8(1)
-; OBJ-NEXT:         8: 94 21 ff c0   stwu 1, -64(1)
+; OBJ-NEXT:         4: 94 21 ff c0   stwu 1, -64(1)
+; OBJ-NEXT:         8: 90 01 00 48   stw 0, 72(1)
 ; OBJ-NEXT:         c: 4b ff ff f5   bl 0x0
 ; OBJ-NEXT:                          0000000c:  R_RBR        (idx: 1) .f at o[PR]
 ; OBJ-NEXT:        10: 60 00 00 00   nop
@@ -122,8 +122,8 @@ declare i32 @"f\40o"(...)
 ; OBJ-EMPTY:
 ; OBJ-NEXT:  00000030 (idx: 9) .f&o:
 ; OBJ-NEXT:        30: 7c 08 02 a6   mflr 0
-; OBJ-NEXT:        34: 90 01 00 08   stw 0, 8(1)
-; OBJ-NEXT:        38: 94 21 ff c0   stwu 1, -64(1)
+; OBJ-NEXT:        34: 94 21 ff c0   stwu 1, -64(1)
+; OBJ-NEXT:        38: 90 01 00 48   stw 0, 72(1)
 ; OBJ-NEXT:        3c: 4b ff ff c5   bl 0x0
 ; OBJ-NEXT:        40: 80 82 00 00   lwz 4, 0(2)
 ; OBJ-NEXT:                          00000042:  R_TOC        (idx: 25) f=o[TC]

diff  --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 3abca8e6d2287..0ab8ab5ee0007 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -509,19 +509,19 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_op_ignore:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -160(1)
 ; AIX32-NEXT:    lwz 3, L..C0(2) # @sc
+; AIX32-NEXT:    stw 0, 168(1)
 ; AIX32-NEXT:    stw 15, 92(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 26, 136(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 28, 144(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    li 15, 1
 ; AIX32-NEXT:    stw 16, 96(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 17, 100(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 4, 3, 3, 27, 28
 ; AIX32-NEXT:    rlwinm 28, 3, 0, 0, 29
 ; AIX32-NEXT:    li 3, 255
 ; AIX32-NEXT:    xori 26, 4, 24
+; AIX32-NEXT:    stw 17, 100(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 18, 104(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 19, 108(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 20, 112(1) # 4-byte Folded Spill
@@ -1755,19 +1755,19 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_fetch_and_op:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -144(1)
 ; AIX32-NEXT:    lwz 4, L..C0(2) # @sc
+; AIX32-NEXT:    stw 0, 152(1)
 ; AIX32-NEXT:    stw 26, 120(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    li 7, 11
 ; AIX32-NEXT:    stw 13, 68(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 14, 72(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 15, 76(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 16, 80(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 3, 4, 3, 27, 28
-; AIX32-NEXT:    stw 17, 84(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 16, 80(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    xori 26, 3, 24
 ; AIX32-NEXT:    li 3, 255
+; AIX32-NEXT:    stw 17, 84(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 18, 88(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 19, 92(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 20, 96(1) # 4-byte Folded Spill
@@ -1784,8 +1784,8 @@ define dso_local void @test_fetch_and_op() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stw 4, 64(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 24, 4, 0, 0, 29
 ; AIX32-NEXT:    slw 23, 7, 26
-; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 4, 3, 26
+; AIX32-NEXT:    sync
 ; AIX32-NEXT:  L..BB1_1: # %entry
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 24
@@ -3197,20 +3197,20 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_op_and_fetch:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -176(1)
+; AIX32-NEXT:    stw 0, 184(1)
 ; AIX32-NEXT:    stw 24, 144(1) # 4-byte Folded Spill
+; AIX32-NEXT:    lwz 24, L..C0(2) # @sc
 ; AIX32-NEXT:    stw 26, 152(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz 26, L..C1(2) # @uc
-; AIX32-NEXT:    lwz 24, L..C0(2) # @sc
 ; AIX32-NEXT:    stw 23, 140(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    li 5, 255
 ; AIX32-NEXT:    stw 13, 100(1) # 4-byte Folded Spill
+; AIX32-NEXT:    rlwinm 4, 24, 3, 27, 28
 ; AIX32-NEXT:    stw 14, 104(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lbz 3, 0(26)
-; AIX32-NEXT:    rlwinm 4, 24, 3, 27, 28
-; AIX32-NEXT:    stw 15, 108(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    xori 23, 4, 24
+; AIX32-NEXT:    stw 15, 108(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 16, 112(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 17, 116(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 18, 120(1) # 4-byte Folded Spill
@@ -3226,9 +3226,9 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stw 31, 172(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 22, 24, 0, 0, 29
 ; AIX32-NEXT:    li 25, -1
-; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 4, 3, 23
 ; AIX32-NEXT:    slw 6, 5, 23
+; AIX32-NEXT:    sync
 ; AIX32-NEXT:  L..BB2_1: # %entry
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 22
@@ -4655,26 +4655,26 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_compare_and_swap:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -128(1)
+; AIX32-NEXT:    stw 0, 136(1)
 ; AIX32-NEXT:    stw 28, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 29, 116(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz 28, L..C0(2) # @sc
+; AIX32-NEXT:    stw 29, 116(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz 29, L..C1(2) # @uc
 ; AIX32-NEXT:    stw 23, 92(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 17, 68(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 18, 72(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 19, 76(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lbz 3, 0(28)
 ; AIX32-NEXT:    rlwinm 4, 28, 3, 27, 28
+; AIX32-NEXT:    stw 19, 76(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lbz 5, 0(29)
-; AIX32-NEXT:    rlwinm 19, 28, 0, 0, 29
 ; AIX32-NEXT:    xori 23, 4, 24
 ; AIX32-NEXT:    li 4, 255
 ; AIX32-NEXT:    slw 6, 3, 23
-; AIX32-NEXT:    slw 5, 5, 23
 ; AIX32-NEXT:    slw 3, 4, 23
+; AIX32-NEXT:    slw 5, 5, 23
 ; AIX32-NEXT:    stw 20, 80(1) # 4-byte Folded Spill
+; AIX32-NEXT:    and 4, 6, 3
 ; AIX32-NEXT:    stw 21, 84(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 22, 88(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 24, 96(1) # 4-byte Folded Spill
@@ -4683,7 +4683,7 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    stw 27, 108(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 30, 120(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 31, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT:    and 4, 6, 3
+; AIX32-NEXT:    rlwinm 19, 28, 0, 0, 29
 ; AIX32-NEXT:    and 5, 5, 3
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:  L..BB3_1: # %entry
@@ -5282,22 +5282,22 @@ define dso_local void @test_lock() local_unnamed_addr #0 {
 ; AIX32-LABEL: test_lock:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -96(1)
+; AIX32-NEXT:    stw 0, 104(1)
 ; AIX32-NEXT:    stw 29, 84(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz 29, L..C0(2) # @sc
 ; AIX32-NEXT:    li 3, 1
 ; AIX32-NEXT:    li 5, 255
 ; AIX32-NEXT:    stw 23, 60(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 24, 64(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 25, 68(1) # 4-byte Folded Spill
-; AIX32-NEXT:    stw 26, 72(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 4, 29, 3, 27, 28
-; AIX32-NEXT:    stw 27, 76(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 25, 68(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    xori 4, 4, 24
-; AIX32-NEXT:    stw 28, 80(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 26, 72(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    slw 8, 3, 4
 ; AIX32-NEXT:    slw 6, 5, 4
+; AIX32-NEXT:    stw 27, 76(1) # 4-byte Folded Spill
+; AIX32-NEXT:    stw 28, 80(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 30, 88(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 31, 92(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    rlwinm 7, 29, 0, 0, 29
@@ -5634,14 +5634,14 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone
 ; AIX32-LABEL: cmpswplp:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -64(1)
 ; AIX32-NEXT:    addic 7, 6, 1
-; AIX32-NEXT:    stw 6, 60(1)
+; AIX32-NEXT:    stw 0, 72(1)
 ; AIX32-NEXT:    addi 4, 1, 56
 ; AIX32-NEXT:    addze 8, 5
-; AIX32-NEXT:    stw 5, 56(1)
+; AIX32-NEXT:    stw 6, 60(1)
 ; AIX32-NEXT:    mr 6, 7
+; AIX32-NEXT:    stw 5, 56(1)
 ; AIX32-NEXT:    mr 5, 8
 ; AIX32-NEXT:    li 7, 0
 ; AIX32-NEXT:    li 8, 0
@@ -5687,18 +5687,18 @@ define dso_local i64 @atommax8(ptr nocapture noundef %ptr, i64 noundef %val) loc
 ; AIX32-LABEL: atommax8:
 ; AIX32:       # %bb.0: # %entry
 ; AIX32-NEXT:    mflr 0
-; AIX32-NEXT:    stw 0, 8(1)
 ; AIX32-NEXT:    stwu 1, -80(1)
+; AIX32-NEXT:    stw 0, 88(1)
+; AIX32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    lwz 9, 4(3)
 ; AIX32-NEXT:    lwz 10, 0(3)
-; AIX32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
+; AIX32-NEXT:    addi 28, 1, 56
 ; AIX32-NEXT:    stw 29, 68(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 30, 72(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    stw 31, 76(1) # 4-byte Folded Spill
 ; AIX32-NEXT:    mr 31, 5
 ; AIX32-NEXT:    mr 29, 3
 ; AIX32-NEXT:    mr 30, 4
-; AIX32-NEXT:    addi 28, 1, 56
 ; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB7_1: # %atomicrmw.start
 ; AIX32-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/alloca-crspill.ll b/llvm/test/CodeGen/PowerPC/alloca-crspill.ll
index 5ad887afa1cfa..da6a206ff9401 100644
--- a/llvm/test/CodeGen/PowerPC/alloca-crspill.ll
+++ b/llvm/test/CodeGen/PowerPC/alloca-crspill.ll
@@ -85,11 +85,12 @@ declare signext i32 @do_something(ptr)
 ; ELFV2-NEXT:      $x12 = MFOCRF8 killed $cr2
 ; V1ANDAIX-NEXT:   $x12 = MFCR8 implicit killed $cr2
 ; CHECK64-DAG:     STD $x31, -8, $x1
-; CHECK64-DAG:     STD killed $x0, 16, $x1
 ; CHECK64-DAG:     STW8 killed $x12, 8, $x1
 
 ; ELFV2-NEXT:      $x1 = STDU $x1, -48, $x1
+; ELFV2-NEXT:      STD killed $x0, 64, $x1
 ; V1ANDAIX-NEXT:   x1 = STDU $x1, -128, $x1
+; V1ANDAIX-NEXT:   STD killed $x0, 144, $x1
 
 ; CHECK64:         $x31 = OR8 $x1, $x1
 
@@ -116,9 +117,9 @@ declare signext i32 @do_something(ptr)
 ; CHECK32:       $r0 = MFLR implicit $lr
 ; CHECK32-NEXT:  $r12 = MFCR implicit killed $cr2
 ; CHECK32-DAG:   STW $r31, -4, $r1
-; CHECK32-DAG:   STW killed $r0, 8, $r1
 ; CHECK32-DAG:   STW killed $r12, 4, $r1
 ; CHECK32:       $r1 = STWU $r1, -80, $r1
+; CHECK32-NEXT:  STW killed $r0, 88, $r1
 
 ; CHECK32:       $r31 = OR $r1, $r1
 ; CHECK32:       $[[ORIGSP:r[0-9]+]] = ADDI $r31, 80

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
index 16fe8d9ca1b43..62b54142f55e6 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
@@ -29,8 +29,8 @@ define dso_local i128 @lq_unordered(ptr %src) {
 ; PWR7-LABEL: lq_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r4, 0
@@ -50,9 +50,9 @@ define dso_local i128 @lq_unordered(ptr %src) {
 ; AIX64-PWR8-LABEL: lq_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r4, 0
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -63,8 +63,8 @@ define dso_local i128 @lq_unordered(ptr %src) {
 ; PPC-PWR8-LABEL: lq_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -98,8 +98,8 @@ define dso_local i128 @lqx_unordered(ptr %src, i64 %idx) {
 ; PWR7-LABEL: lqx_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    sldi r4, r4, 4
@@ -125,9 +125,9 @@ define dso_local i128 @lqx_unordered(ptr %src, i64 %idx) {
 ; AIX64-PWR8-LABEL: lqx_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    sldi r4, r4, 4
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    add r3, r3, r4
 ; AIX64-PWR8-NEXT:    li r4, 0
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
@@ -140,8 +140,8 @@ define dso_local i128 @lqx_unordered(ptr %src, i64 %idx) {
 ; PPC-PWR8-LABEL: lqx_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    slwi r4, r6, 4
@@ -177,8 +177,8 @@ define dso_local i128 @lq_big_offset_unordered(ptr %src) {
 ; PWR7-LABEL: lq_big_offset_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    addis r3, r3, 32
@@ -203,10 +203,10 @@ define dso_local i128 @lq_big_offset_unordered(ptr %src) {
 ; AIX64-PWR8-LABEL: lq_big_offset_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    addis r3, r3, 32
 ; AIX64-PWR8-NEXT:    li r4, 0
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -217,8 +217,8 @@ define dso_local i128 @lq_big_offset_unordered(ptr %src) {
 ; PPC-PWR8-LABEL: lq_big_offset_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    addis r4, r3, 32
@@ -251,8 +251,8 @@ define dso_local i128 @lq_monotonic(ptr %src) {
 ; PWR7-LABEL: lq_monotonic:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r4, 0
@@ -272,9 +272,9 @@ define dso_local i128 @lq_monotonic(ptr %src) {
 ; AIX64-PWR8-LABEL: lq_monotonic:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r4, 0
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -285,8 +285,8 @@ define dso_local i128 @lq_monotonic(ptr %src) {
 ; PPC-PWR8-LABEL: lq_monotonic:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -321,8 +321,8 @@ define dso_local i128 @lq_acquire(ptr %src) {
 ; PWR7-LABEL: lq_acquire:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r4, 2
@@ -345,9 +345,9 @@ define dso_local i128 @lq_acquire(ptr %src) {
 ; AIX64-PWR8-LABEL: lq_acquire:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r4, 2
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -358,8 +358,8 @@ define dso_local i128 @lq_acquire(ptr %src) {
 ; PPC-PWR8-LABEL: lq_acquire:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -395,8 +395,8 @@ define dso_local i128 @lq_seqcst(ptr %src) {
 ; PWR7-LABEL: lq_seqcst:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r4, 5
@@ -420,9 +420,9 @@ define dso_local i128 @lq_seqcst(ptr %src) {
 ; AIX64-PWR8-LABEL: lq_seqcst:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r4, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_load_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -433,8 +433,8 @@ define dso_local i128 @lq_seqcst(ptr %src) {
 ; PPC-PWR8-LABEL: lq_seqcst:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -466,8 +466,8 @@ define dso_local void @stq_unordered(i128 %val, ptr %dst) {
 ; PWR7-LABEL: stq_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r6, r4
@@ -492,11 +492,11 @@ define dso_local void @stq_unordered(i128 %val, ptr %dst) {
 ; AIX64-PWR8-LABEL: stq_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r6, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
 ; AIX64-PWR8-NEXT:    mr r3, r5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    mr r5, r6
 ; AIX64-PWR8-NEXT:    li r6, 0
 ; AIX64-PWR8-NEXT:    bl .__atomic_store_16[PR]
@@ -509,8 +509,8 @@ define dso_local void @stq_unordered(i128 %val, ptr %dst) {
 ; PPC-PWR8-LABEL: stq_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)
@@ -544,8 +544,8 @@ define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
 ; PWR7-LABEL: stqx_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r7, r4
@@ -573,10 +573,10 @@ define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
 ; AIX64-PWR8-LABEL: stqx_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r7, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    sldi r3, r6, 4
 ; AIX64-PWR8-NEXT:    li r6, 0
 ; AIX64-PWR8-NEXT:    add r3, r5, r3
@@ -591,8 +591,8 @@ define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
 ; PPC-PWR8-LABEL: stqx_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    slwi r8, r10, 4
@@ -629,8 +629,8 @@ define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
 ; PWR7-LABEL: stq_big_offset_unordered:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r6, r4
@@ -657,11 +657,11 @@ define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
 ; AIX64-PWR8-LABEL: stq_big_offset_unordered:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r6, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
 ; AIX64-PWR8-NEXT:    addis r3, r5, 32
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    mr r5, r6
 ; AIX64-PWR8-NEXT:    li r6, 0
 ; AIX64-PWR8-NEXT:    bl .__atomic_store_16[PR]
@@ -674,8 +674,8 @@ define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
 ; PPC-PWR8-LABEL: stq_big_offset_unordered:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)
@@ -709,8 +709,8 @@ define dso_local void @stq_monotonic(i128 %val, ptr %dst) {
 ; PWR7-LABEL: stq_monotonic:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r6, r4
@@ -735,11 +735,11 @@ define dso_local void @stq_monotonic(i128 %val, ptr %dst) {
 ; AIX64-PWR8-LABEL: stq_monotonic:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r6, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
 ; AIX64-PWR8-NEXT:    mr r3, r5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    mr r5, r6
 ; AIX64-PWR8-NEXT:    li r6, 0
 ; AIX64-PWR8-NEXT:    bl .__atomic_store_16[PR]
@@ -752,8 +752,8 @@ define dso_local void @stq_monotonic(i128 %val, ptr %dst) {
 ; PPC-PWR8-LABEL: stq_monotonic:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)
@@ -786,8 +786,8 @@ define dso_local void @stq_release(i128 %val, ptr %dst) {
 ; PWR7-LABEL: stq_release:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r6, r4
@@ -813,11 +813,11 @@ define dso_local void @stq_release(i128 %val, ptr %dst) {
 ; AIX64-PWR8-LABEL: stq_release:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r6, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
 ; AIX64-PWR8-NEXT:    mr r3, r5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    mr r5, r6
 ; AIX64-PWR8-NEXT:    li r6, 3
 ; AIX64-PWR8-NEXT:    bl .__atomic_store_16[PR]
@@ -830,8 +830,8 @@ define dso_local void @stq_release(i128 %val, ptr %dst) {
 ; PPC-PWR8-LABEL: stq_release:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)
@@ -864,8 +864,8 @@ define dso_local void @stq_seqcst(i128 %val, ptr %dst) {
 ; PWR7-LABEL: stq_seqcst:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    mr r6, r4
@@ -891,11 +891,11 @@ define dso_local void @stq_seqcst(i128 %val, ptr %dst) {
 ; AIX64-PWR8-LABEL: stq_seqcst:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    mr r6, r4
 ; AIX64-PWR8-NEXT:    mr r4, r3
 ; AIX64-PWR8-NEXT:    mr r3, r5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    mr r5, r6
 ; AIX64-PWR8-NEXT:    li r6, 5
 ; AIX64-PWR8-NEXT:    bl .__atomic_store_16[PR]
@@ -908,8 +908,8 @@ define dso_local void @stq_seqcst(i128 %val, ptr %dst) {
 ; PPC-PWR8-LABEL: stq_seqcst:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -32(r1)
+; PPC-PWR8-NEXT:    stw r0, 36(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    stw r6, 28(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 113fb1636da0f..ee8ddb7ad70cf 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -43,8 +43,8 @@ define i128 @swap(ptr %a, i128 %x) {
 ; PWR7-LABEL: swap:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -74,9 +74,9 @@ define i128 @swap(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: swap:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -87,8 +87,8 @@ define i128 @swap(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: swap:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -134,8 +134,8 @@ define i128 @add(ptr %a, i128 %x) {
 ; PWR7-LABEL: add:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -165,9 +165,9 @@ define i128 @add(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: add:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_add_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -178,8 +178,8 @@ define i128 @add(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: add:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -272,8 +272,8 @@ define i128 @sub(ptr %a, i128 %x) {
 ; PWR7-LABEL: sub:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -303,9 +303,9 @@ define i128 @sub(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: sub:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_sub_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -316,8 +316,8 @@ define i128 @sub(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: sub:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -410,8 +410,8 @@ define i128 @and(ptr %a, i128 %x) {
 ; PWR7-LABEL: and:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -441,9 +441,9 @@ define i128 @and(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: and:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_and_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -454,8 +454,8 @@ define i128 @and(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: and:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -548,8 +548,8 @@ define i128 @or(ptr %a, i128 %x) {
 ; PWR7-LABEL: or:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -579,9 +579,9 @@ define i128 @or(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: or:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_or_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -592,8 +592,8 @@ define i128 @or(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: or:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -686,8 +686,8 @@ define i128 @xor(ptr %a, i128 %x) {
 ; PWR7-LABEL: xor:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -717,9 +717,9 @@ define i128 @xor(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: xor:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_xor_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -730,8 +730,8 @@ define i128 @xor(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: xor:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -824,8 +824,8 @@ define i128 @nand(ptr %a, i128 %x) {
 ; PWR7-LABEL: nand:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -112(r1)
+; PWR7-NEXT:    std r0, 128(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    li r6, 5
@@ -855,9 +855,9 @@ define i128 @nand(ptr %a, i128 %x) {
 ; AIX64-PWR8-LABEL: nand:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
 ; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    std r0, 128(r1)
 ; AIX64-PWR8-NEXT:    bl .__atomic_fetch_nand_16[PR]
 ; AIX64-PWR8-NEXT:    nop
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
@@ -868,8 +868,8 @@ define i128 @nand(ptr %a, i128 %x) {
 ; PPC-PWR8-LABEL: nand:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -80(r1)
+; PPC-PWR8-NEXT:    stw r0, 84(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 80
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    .cfi_offset r24, -32
@@ -971,8 +971,8 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PWR7-LABEL: cas_weak_acquire_acquire:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    std r0, 144(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    std r5, 120(r1)
@@ -1018,8 +1018,8 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-LABEL: cas_weak_acquire_acquire:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r0, 144(r1)
 ; AIX64-PWR8-NEXT:    std r5, 120(r1)
 ; AIX64-PWR8-NEXT:    std r4, 112(r1)
 ; AIX64-PWR8-NEXT:    addi r4, r1, 112
@@ -1039,8 +1039,8 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -1102,8 +1102,8 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
 ; PWR7-LABEL: cas_weak_release_monotonic:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    std r0, 144(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    std r5, 120(r1)
@@ -1149,8 +1149,8 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-LABEL: cas_weak_release_monotonic:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r0, 144(r1)
 ; AIX64-PWR8-NEXT:    std r5, 120(r1)
 ; AIX64-PWR8-NEXT:    std r4, 112(r1)
 ; AIX64-PWR8-NEXT:    addi r4, r1, 112
@@ -1170,8 +1170,8 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-LABEL: cas_weak_release_monotonic:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -1234,8 +1234,8 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
 ; PWR7-LABEL: cas_sc_sc:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    std r0, 144(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    std r5, 120(r1)
@@ -1282,8 +1282,8 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-LABEL: cas_sc_sc:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r0, 144(r1)
 ; AIX64-PWR8-NEXT:    std r5, 120(r1)
 ; AIX64-PWR8-NEXT:    std r4, 112(r1)
 ; AIX64-PWR8-NEXT:    addi r4, r1, 112
@@ -1303,8 +1303,8 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-LABEL: cas_sc_sc:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -1367,8 +1367,8 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PWR7-LABEL: cas_acqrel_acquire:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    std r0, 144(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    std r5, 120(r1)
@@ -1415,8 +1415,8 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-LABEL: cas_acqrel_acquire:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r0, 144(r1)
 ; AIX64-PWR8-NEXT:    std r5, 120(r1)
 ; AIX64-PWR8-NEXT:    std r4, 112(r1)
 ; AIX64-PWR8-NEXT:    addi r4, r1, 112
@@ -1436,8 +1436,8 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-LABEL: cas_acqrel_acquire:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3
@@ -1503,8 +1503,8 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
 ; PWR7-LABEL: cas_acqrel_acquire_check_succ:
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
-; PWR7-NEXT:    std r0, 16(r1)
 ; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    std r0, 144(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
 ; PWR7-NEXT:    std r5, 120(r1)
@@ -1552,8 +1552,8 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ:
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
-; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r0, 144(r1)
 ; AIX64-PWR8-NEXT:    std r5, 120(r1)
 ; AIX64-PWR8-NEXT:    std r4, 112(r1)
 ; AIX64-PWR8-NEXT:    addi r4, r1, 112
@@ -1571,8 +1571,8 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
 ; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
 ; PPC-PWR8:       # %bb.0: # %entry
 ; PPC-PWR8-NEXT:    mflr r0
-; PPC-PWR8-NEXT:    stw r0, 4(r1)
 ; PPC-PWR8-NEXT:    stwu r1, -48(r1)
+; PPC-PWR8-NEXT:    stw r0, 52(r1)
 ; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 48
 ; PPC-PWR8-NEXT:    .cfi_offset lr, 4
 ; PPC-PWR8-NEXT:    mr r4, r3

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
index 81201e62d8856..3ca804d820683 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -69,8 +69,8 @@ define i64 @load_x_i64_unordered(ptr %mem) {
 ; PPC32-LABEL: load_x_i64_unordered:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    addi r3, r3, -896
@@ -136,8 +136,8 @@ define void @store_x_i64_unordered(ptr %mem) {
 ; PPC32-LABEL: store_x_i64_unordered:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    addi r3, r3, -896

diff  --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index d0b9471f7d6cf..57992cff28c62 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -50,8 +50,8 @@ define i64 @load_i64_seq_cst(ptr %mem) {
 ; PPC32-LABEL: load_i64_seq_cst:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    li r4, 5
@@ -110,8 +110,8 @@ define void @store_i64_seq_cst(ptr %mem) {
 ; PPC32-LABEL: store_i64_seq_cst:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    li r5, 0
@@ -280,8 +280,8 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
 ; PPC32-LABEL: cas_weak_i64_release_monotonic:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    li r4, 0
@@ -435,8 +435,8 @@ define i64 @and_i64_release(ptr %mem, i64 %operand) {
 ; PPC32-LABEL: and_i64_release:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    mflr r0
-; PPC32-NEXT:    stw r0, 4(r1)
 ; PPC32-NEXT:    stwu r1, -16(r1)
+; PPC32-NEXT:    stw r0, 20(r1)
 ; PPC32-NEXT:    .cfi_def_cfa_offset 16
 ; PPC32-NEXT:    .cfi_offset lr, 4
 ; PPC32-NEXT:    li r7, 3

diff  --git a/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll b/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
index 9f1687201484c..2d9a9ab477880 100644
--- a/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
+++ b/llvm/test/CodeGen/PowerPC/branch-on-store-cond.ll
@@ -8,8 +8,8 @@ define dso_local zeroext i8 @test1(ptr noundef %addr, i8 noundef zeroext %newval
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    stbcx. 4, 0, 3
@@ -27,8 +27,8 @@ define dso_local zeroext i8 @test1(ptr noundef %addr, i8 noundef zeroext %newval
 ; CHECK-AIX-LABEL: test1:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    std 0, 16(1)
 ; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stbcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB0_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
@@ -58,8 +58,8 @@ define dso_local signext i16 @test2(ptr noundef %addr, i16 noundef signext %newv
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    sthcx. 4, 0, 3
@@ -77,8 +77,8 @@ define dso_local signext i16 @test2(ptr noundef %addr, i16 noundef signext %newv
 ; CHECK-AIX-LABEL: test2:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    std 0, 16(1)
 ; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    sthcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB1_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
@@ -108,8 +108,8 @@ define dso_local signext i32 @test3(ptr noundef %addr, i32 noundef signext %newv
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    stwcx. 4, 0, 3
@@ -127,8 +127,8 @@ define dso_local signext i32 @test3(ptr noundef %addr, i32 noundef signext %newv
 ; CHECK-AIX-LABEL: test3:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    std 0, 16(1)
 ; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stwcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB2_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then
@@ -157,8 +157,8 @@ define dso_local i64 @test4(ptr noundef %addr, i64 noundef %newval) local_unname
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    stdcx. 4, 0, 3
@@ -176,8 +176,8 @@ define dso_local i64 @test4(ptr noundef %addr, i64 noundef %newval) local_unname
 ; CHECK-AIX-LABEL: test4:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    std 0, 16(1)
 ; CHECK-AIX-NEXT:    stdu 1, -112(1)
+; CHECK-AIX-NEXT:    std 0, 128(1)
 ; CHECK-AIX-NEXT:    stdcx. 4, 0, 3
 ; CHECK-AIX-NEXT:    bne 0, L..BB3_2
 ; CHECK-AIX-NEXT:  # %bb.1: # %if.then

diff  --git a/llvm/test/CodeGen/PowerPC/byval.ll b/llvm/test/CodeGen/PowerPC/byval.ll
index c76c316e3c90a..8688f5a8993ef 100644
--- a/llvm/test/CodeGen/PowerPC/byval.ll
+++ b/llvm/test/CodeGen/PowerPC/byval.ll
@@ -15,8 +15,8 @@ define dso_local i32 @bar() {
 ; CHECK-LABEL: bar:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -80(1)
+; CHECK-NEXT:    std 0, 96(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addi 3, 1, 40

diff  --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index 48d759e5c9593..7ea300c6de965 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -559,8 +559,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P8:       # %bb.0: # %test_entry
 ; CHECK-P8-NEXT:    mflr r0
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    bl dummy
 ; CHECK-P8-NEXT:    nop
@@ -574,8 +574,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9:       # %bb.0: # %test_entry
 ; CHECK-P9-NEXT:    mflr r0
 ; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r0, 16(r1)
 ; CHECK-P9-NEXT:    stdu r1, -48(r1)
+; CHECK-P9-NEXT:    std r0, 64(r1)
 ; CHECK-P9-NEXT:    mr r30, r3
 ; CHECK-P9-NEXT:    bl dummy
 ; CHECK-P9-NEXT:    nop
@@ -587,8 +587,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P9-BE-LABEL: no_crash_elt0_from_RHS:
 ; CHECK-P9-BE:       # %bb.0: # %test_entry
 ; CHECK-P9-BE-NEXT:    mflr r0
-; CHECK-P9-BE-NEXT:    std r0, 16(r1)
 ; CHECK-P9-BE-NEXT:    stdu r1, -128(r1)
+; CHECK-P9-BE-NEXT:    std r0, 144(r1)
 ; CHECK-P9-BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
 ; CHECK-P9-BE-NEXT:    mr r30, r3
 ; CHECK-P9-BE-NEXT:    bl dummy
@@ -602,8 +602,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-NOVSX:       # %bb.0: # %test_entry
 ; CHECK-NOVSX-NEXT:    mflr r0
 ; CHECK-NOVSX-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NOVSX-NEXT:    std r0, 16(r1)
 ; CHECK-NOVSX-NEXT:    stdu r1, -48(r1)
+; CHECK-NOVSX-NEXT:    std r0, 64(r1)
 ; CHECK-NOVSX-NEXT:    mr r30, r3
 ; CHECK-NOVSX-NEXT:    bl dummy
 ; CHECK-NOVSX-NEXT:    nop
@@ -615,8 +615,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; CHECK-P7:       # %bb.0: # %test_entry
 ; CHECK-P7-NEXT:    mflr r0
 ; CHECK-P7-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P7-NEXT:    std r0, 16(r1)
 ; CHECK-P7-NEXT:    stdu r1, -48(r1)
+; CHECK-P7-NEXT:    std r0, 64(r1)
 ; CHECK-P7-NEXT:    mr r30, r3
 ; CHECK-P7-NEXT:    bl dummy
 ; CHECK-P7-NEXT:    nop
@@ -629,8 +629,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-64-LABEL: no_crash_elt0_from_RHS:
 ; P8-AIX-64:       # %bb.0: # %test_entry
 ; P8-AIX-64-NEXT:    mflr r0
-; P8-AIX-64-NEXT:    std r0, 16(r1)
 ; P8-AIX-64-NEXT:    stdu r1, -128(r1)
+; P8-AIX-64-NEXT:    std r0, 144(r1)
 ; P8-AIX-64-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; P8-AIX-64-NEXT:    mr r31, r3
 ; P8-AIX-64-NEXT:    bl .dummy[PR]
@@ -643,8 +643,8 @@ define dso_local void @no_crash_elt0_from_RHS(ptr noalias nocapture dereferencea
 ; P8-AIX-32-LABEL: no_crash_elt0_from_RHS:
 ; P8-AIX-32:       # %bb.0: # %test_entry
 ; P8-AIX-32-NEXT:    mflr r0
-; P8-AIX-32-NEXT:    stw r0, 8(r1)
 ; P8-AIX-32-NEXT:    stwu r1, -64(r1)
+; P8-AIX-32-NEXT:    stw r0, 72(r1)
 ; P8-AIX-32-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
 ; P8-AIX-32-NEXT:    mr r31, r3
 ; P8-AIX-32-NEXT:    bl .dummy[PR]

diff  --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll
index 4185a41b50f24..a9feb93627b06 100644
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -389,8 +389,8 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-P9-LABEL: three_constants_ppcf128:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mflr r0
-; CHECK-P9-NEXT:    std r0, 16(r1)
 ; CHECK-P9-NEXT:    stdu r1, -32(r1)
+; CHECK-P9-NEXT:    std r0, 48(r1)
 ; CHECK-P9-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P9-NEXT:    .cfi_offset lr, 16
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0 at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll
index 66ea981822ab2..e3ba42a50fb7b 100644
--- a/llvm/test/CodeGen/PowerPC/csr-split.ll
+++ b/llvm/test/CodeGen/PowerPC/csr-split.ll
@@ -16,10 +16,10 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -48(r1)
 ; CHECK-PWR9-NEXT:    mr r30, r3
 ; CHECK-PWR9-NEXT:    addis r3, r2, a at toc@ha
+; CHECK-PWR9-NEXT:    std r0, 64(r1)
 ; CHECK-PWR9-NEXT:    lwa r3, a at toc@l(r3)
 ; CHECK-PWR9-NEXT:    cmpld r3, r30
 ; CHECK-PWR9-NEXT:    # implicit-def: $r3
@@ -41,8 +41,8 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr  {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -128(r1)
+; CHECK-NEXT:    std r0, 144(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
@@ -95,10 +95,10 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr  {
 ; CHECK-PWR9-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR9-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -48(r1)
 ; CHECK-PWR9-NEXT:    mr r30, r3
 ; CHECK-PWR9-NEXT:    li r3, 0
+; CHECK-PWR9-NEXT:    std r0, 64(r1)
 ; CHECK-PWR9-NEXT:    cmpldi r30, 0
 ; CHECK-PWR9-NEXT:    beq cr0, .LBB1_3
 ; CHECK-PWR9-NEXT:  # %bb.1: # %if.end
@@ -122,8 +122,8 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr  {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -128(r1)
+; CHECK-NEXT:    std r0, 144(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
@@ -181,8 +181,8 @@ define dso_local ptr @test3(ptr nocapture %p1, i8 zeroext %p2) local_unnamed_add
 ; CHECK-PWR9-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    std r0, 16(r1)
 ; CHECK-PWR9-NEXT:    stdu r1, -64(r1)
+; CHECK-PWR9-NEXT:    std r0, 80(r1)
 ; CHECK-PWR9-NEXT:    ld r30, 0(r3)
 ; CHECK-PWR9-NEXT:    cmpldi r30, 0
 ; CHECK-PWR9-NEXT:    beq cr0, .LBB2_2
@@ -204,8 +204,8 @@ define dso_local ptr @test3(ptr nocapture %p1, i8 zeroext %p2) local_unnamed_add
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -144(r1)
+; CHECK-NEXT:    std r0, 160(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 144
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r29, -24

diff  --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
index 7bc80a0343f4a..d872a9e06106b 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
@@ -12,10 +12,10 @@ define void @test(ptr %cast) {
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
 ; CHECK-NEXT:    li 30, 0
 ; CHECK-NEXT:    addi 29, 3, -8
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_1: # %for.body
 ; CHECK-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
index 3c9f39ff76682..bc57d26402596 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
@@ -36,19 +36,19 @@ define void @fmul_ctrloop_fp128() nounwind {
 ; PWR8-LABEL: fmul_ctrloop_fp128:
 ; PWR8:       # %bb.0: # %entry
 ; PWR8-NEXT:    mflr 0
-; PWR8-NEXT:    std 0, 16(1)
 ; PWR8-NEXT:    stdu 1, -112(1)
 ; PWR8-NEXT:    li 3, 48
+; PWR8-NEXT:    std 0, 128(1)
 ; PWR8-NEXT:    addis 4, 2, x at toc@ha
 ; PWR8-NEXT:    std 28, 80(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 29, 88(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, 96(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    li 30, 0
-; PWR8-NEXT:    li 29, 16
 ; PWR8-NEXT:    addi 4, 4, x at toc@l
-; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
+; PWR8-NEXT:    li 29, 16
 ; PWR8-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PWR8-NEXT:    addis 3, 2, a at toc@ha
+; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 27, 72(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    addi 3, 3, a at toc@l
 ; PWR8-NEXT:    lxvd2x 0, 0, 3
@@ -133,11 +133,11 @@ define void @fpext_ctrloop_fp128(ptr %a) nounwind {
 ; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PWR8-NEXT:    std 0, 16(1)
 ; PWR8-NEXT:    stdu 1, -64(1)
 ; PWR8-NEXT:    addis 4, 2, y at toc@ha
 ; PWR8-NEXT:    addi 30, 3, -8
 ; PWR8-NEXT:    li 28, 0
+; PWR8-NEXT:    std 0, 80(1)
 ; PWR8-NEXT:    addi 4, 4, y at toc@l
 ; PWR8-NEXT:    addi 29, 4, -16
 ; PWR8-NEXT:    .p2align 4
@@ -206,11 +206,11 @@ define void @fptrunc_ctrloop_fp128(ptr %a) nounwind {
 ; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PWR8-NEXT:    std 0, 16(1)
 ; PWR8-NEXT:    stdu 1, -64(1)
 ; PWR8-NEXT:    addis 4, 2, x at toc@ha
 ; PWR8-NEXT:    addi 30, 3, -8
 ; PWR8-NEXT:    li 28, 0
+; PWR8-NEXT:    std 0, 80(1)
 ; PWR8-NEXT:    addi 4, 4, x at toc@l
 ; PWR8-NEXT:    addi 29, 4, -16
 ; PWR8-NEXT:    .p2align 4

diff  --git a/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
index 93082fef0aa6d..db459598babc9 100644
--- a/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
+++ b/llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
@@ -16,8 +16,8 @@ define cxx_fast_tlscc nonnull ptr @_ZTW2sg() nounwind {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -48(1)
+; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    addis 3, 13, __tls_guard at tprel@ha
 ; CHECK-NEXT:    lbz 4, __tls_guard at tprel@l(3)
 ; CHECK-NEXT:    andi. 4, 4, 1

diff  --git a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
index fef538c365130..19f8f01d35538 100644
--- a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
+++ b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll
@@ -11,40 +11,11 @@
 declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2
 
 define ppc_fp128 @test_ctr0() {
-; LE-LABEL: test_ctr0:
-; LE:       # %bb.0: # %bb
-; LE-NEXT:    mflr r0
-; LE-NEXT:    .cfi_def_cfa_offset 48
-; LE-NEXT:    .cfi_offset lr, 16
-; LE-NEXT:    .cfi_offset r30, -16
-; LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-NEXT:    std r0, 16(r1)
-; LE-NEXT:    stdu r1, -48(r1)
-; LE-NEXT:    xxlxor f1, f1, f1
-; LE-NEXT:    li r30, 0
-; LE-NEXT:    xxlxor f2, f2, f2
-; LE-NEXT:    .p2align 5
-; LE-NEXT:  .LBB0_1: # %bb6
-; LE-NEXT:    #
-; LE-NEXT:    xxlxor f3, f3, f3
-; LE-NEXT:    xxlxor f4, f4, f4
-; LE-NEXT:    bl __gcc_qadd
-; LE-NEXT:    nop
-; LE-NEXT:    addi r30, r30, 4
-; LE-NEXT:    cmpldi r30, 0
-; LE-NEXT:    bne cr0, .LBB0_1
-; LE-NEXT:  # %bb.2: # %bb14
-; LE-NEXT:    addi r1, r1, 48
-; LE-NEXT:    ld r0, 16(r1)
-; LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; LE-NEXT:    mtlr r0
-; LE-NEXT:    blr
-;
 ; P9BE-LABEL: test_ctr0:
 ; P9BE:       # %bb.0: # %bb
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    .cfi_def_cfa_offset 128
 ; P9BE-NEXT:    .cfi_offset lr, 16
 ; P9BE-NEXT:    .cfi_offset r30, -16
@@ -72,8 +43,8 @@ define ppc_fp128 @test_ctr0() {
 ; P8BE-LABEL: test_ctr0:
 ; P8BE:       # %bb.0: # %bb
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    .cfi_def_cfa_offset 128
 ; P8BE-NEXT:    .cfi_offset lr, 16
 ; P8BE-NEXT:    .cfi_offset r30, -16
@@ -111,3 +82,5 @@ bb6:                                              ; preds = %bb6, %bb
 bb14:                                             ; preds = %bb6
   ret ppc_fp128 %i8
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LE: {{.*}}

diff  --git a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
index 9eaffb4947925..3eb33104d544f 100644
--- a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
+++ b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
@@ -11,8 +11,8 @@ define void @call_test_byval_mem1() #0 {
 ; CHECK-LABEL: call_test_byval_mem1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -45,8 +45,8 @@ define void @call_test_byval_mem1_2() #0 {
 ; CHECK-LABEL: call_test_byval_mem1_2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    std 0, 128(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -86,8 +86,8 @@ define void @call_test_byval_mem1_3() #0 {
 ; CHECK-LABEL: call_test_byval_mem1_3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -126,8 +126,8 @@ define void @call_test_byval_mem1_4() #0 {
 ; CHECK-LABEL: call_test_byval_mem1_4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    std 0, 128(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -168,8 +168,8 @@ define void @call_test_byval_mem1_5() #0 {
 ; CHECK-LABEL: call_test_byval_mem1_5:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -211,8 +211,8 @@ define void @call_test_byval_mem2() #0 {
 ; CHECK-LABEL: call_test_byval_mem2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC1 at toc@ha
@@ -247,8 +247,8 @@ define void @call_test_byval_mem3() #0 {
 ; CHECK-LABEL: call_test_byval_mem3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC2 at toc@ha
@@ -290,8 +290,8 @@ define void @call_test_byval_mem4() #0 {
 ; CHECK-LABEL: call_test_byval_mem4:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC3 at toc@ha
@@ -326,8 +326,8 @@ define void @call_test_byval_mem8() #0 {
 ; CHECK-LABEL: call_test_byval_mem8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC4 at toc@ha
@@ -363,8 +363,8 @@ define void @call_test_byval_mem32() #0 {
 ; CHECK-LABEL: call_test_byval_mem32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC5 at toc@ha
@@ -403,8 +403,8 @@ define void @call_test_byval_mem32_2() #0 {
 ; CHECK-LABEL: call_test_byval_mem32_2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC5 at toc@ha
@@ -444,8 +444,8 @@ define void @call_test_byval_mem32_3() #0 {
 ; CHECK-LABEL: call_test_byval_mem32_3:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    std 0, 128(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC5 at toc@ha
@@ -498,8 +498,8 @@ define void @call_test_byval_mem64() #0 {
 ; CHECK-LABEL: call_test_byval_mem64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC6 at toc@ha
@@ -548,8 +548,8 @@ define void @call_test_byval_mem65() #0 {
 ; CHECK-LABEL: call_test_byval_mem65:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    std 0, 128(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC7 at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll b/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll
index 834d986cce0e0..b50138a936d50 100644
--- a/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/expand-foldable-isel.ll
@@ -31,11 +31,11 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot
 ; CHECK-GEN-ISEL-TRUE-NEXT:    .cfi_offset r30, -16
 ; CHECK-GEN-ISEL-TRUE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-GEN-ISEL-TRUE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-GEN-ISEL-TRUE-NEXT:    std r0, 16(r1)
 ; CHECK-GEN-ISEL-TRUE-NEXT:    stdu r1, -64(r1)
 ; CHECK-GEN-ISEL-TRUE-NEXT:    mr r30, r3
 ; CHECK-GEN-ISEL-TRUE-NEXT:    # implicit-def: $x3
 ; CHECK-GEN-ISEL-TRUE-NEXT:    # implicit-def: $r29
+; CHECK-GEN-ISEL-TRUE-NEXT:    std r0, 80(r1)
 ; CHECK-GEN-ISEL-TRUE-NEXT:    .p2align 4
 ; CHECK-GEN-ISEL-TRUE-NEXT:  .LBB0_1: # %while.cond11
 ; CHECK-GEN-ISEL-TRUE-NEXT:    #
@@ -71,11 +71,11 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    # implicit-def: $x3
 ; CHECK-NEXT:    # implicit-def: $r29
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %while.cond11
 ; CHECK-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index 2e89d4c169a24..dca85a6750adf 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -355,8 +355,8 @@ define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
 ; CHECK-P8-LABEL: testMixedAggregate_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -96(r1)
+; CHECK-P8-NEXT:    std r0, 112(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -596,16 +596,16 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    addis r11, r2, .LCPI17_0 at toc@ha
 ; CHECK-P8-NEXT:    cmpwi r3, 0
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    std r4, 104(r1)
 ; CHECK-P8-NEXT:    std r5, 112(r1)
 ; CHECK-P8-NEXT:    std r6, 120(r1)
-; CHECK-P8-NEXT:    std r7, 128(r1)
 ; CHECK-P8-NEXT:    addi r11, r11, .LCPI17_0 at toc@l
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r11
+; CHECK-P8-NEXT:    std r7, 128(r1)
 ; CHECK-P8-NEXT:    std r8, 136(r1)
 ; CHECK-P8-NEXT:    std r9, 144(r1)
 ; CHECK-P8-NEXT:    std r10, 152(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index 37e5cd8567ed3..bc1c7399bef13 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -21,10 +21,10 @@ define dso_local void @qpAdd(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -59,10 +59,10 @@ define dso_local void @qpSub(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __subkf3
@@ -97,10 +97,10 @@ define dso_local void @qpMul(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __mulkf3
@@ -135,10 +135,10 @@ define dso_local void @qpDiv(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmr v3, v2
 ; CHECK-P8-NEXT:    bl __divkf3
@@ -196,10 +196,10 @@ define dso_local void @qpSqrt(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl sqrtf128
 ; CHECK-P8-NEXT:    nop
@@ -347,8 +347,8 @@ define fp128 @qp_sin(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_sin:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -362,8 +362,8 @@ define fp128 @qp_sin(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_sin:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -385,8 +385,8 @@ define fp128 @qp_cos(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_cos:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -400,8 +400,8 @@ define fp128 @qp_cos(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_cos:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -423,8 +423,8 @@ define fp128 @qp_log(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_log:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -438,8 +438,8 @@ define fp128 @qp_log(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_log:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -461,8 +461,8 @@ define fp128 @qp_log10(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_log10:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -476,8 +476,8 @@ define fp128 @qp_log10(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_log10:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -499,8 +499,8 @@ define fp128 @qp_log2(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_log2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -514,8 +514,8 @@ define fp128 @qp_log2(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_log2:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -537,8 +537,8 @@ define fp128 @qp_minnum(ptr nocapture readonly %a,
 ; CHECK-LABEL: qp_minnum:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -553,8 +553,8 @@ define fp128 @qp_minnum(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qp_minnum:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -580,8 +580,8 @@ define fp128 @qp_maxnum(ptr nocapture readonly %a,
 ; CHECK-LABEL: qp_maxnum:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -596,8 +596,8 @@ define fp128 @qp_maxnum(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qp_maxnum:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -623,8 +623,8 @@ define fp128 @qp_pow(ptr nocapture readonly %a,
 ; CHECK-LABEL: qp_pow:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -639,8 +639,8 @@ define fp128 @qp_pow(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qp_pow:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -666,8 +666,8 @@ define fp128 @qp_exp(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_exp:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -681,8 +681,8 @@ define fp128 @qp_exp(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_exp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -704,8 +704,8 @@ define fp128 @qp_exp2(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qp_exp2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -719,8 +719,8 @@ define fp128 @qp_exp2(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qp_exp2:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -746,10 +746,10 @@ define dso_local void @qp_powi(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    lxv v2, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r5
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    lwz r5, 0(r4)
 ; CHECK-NEXT:    bl __powikf2
 ; CHECK-NEXT:    nop
@@ -767,11 +767,11 @@ define dso_local void @qp_powi(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lwz r3, 0(r4)
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    mr r5, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl __powikf2
@@ -800,8 +800,8 @@ define fp128 @qp_frem() #0 {
 ; CHECK-LABEL: qp_frem:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis r3, r2, a at toc@ha
@@ -820,8 +820,8 @@ define fp128 @qp_frem() #0 {
 ; CHECK-P8-LABEL: qp_frem:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a at toc@ha
@@ -860,10 +860,10 @@ define dso_local void @qpCeil(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl ceilf128
 ; CHECK-P8-NEXT:    nop
@@ -897,10 +897,10 @@ define dso_local void @qpFloor(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl floorf128
 ; CHECK-P8-NEXT:    nop
@@ -934,10 +934,10 @@ define dso_local void @qpTrunc(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl truncf128
 ; CHECK-P8-NEXT:    nop
@@ -971,10 +971,10 @@ define dso_local void @qpRound(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl roundf128
 ; CHECK-P8-NEXT:    nop
@@ -1001,10 +1001,10 @@ define dso_local void @qpLRound(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    lxv v2, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    bl lroundf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    stw r3, 0(r30)
@@ -1021,10 +1021,10 @@ define dso_local void @qpLRound(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl lroundf128
 ; CHECK-P8-NEXT:    nop
@@ -1050,10 +1050,10 @@ define dso_local void @qpLLRound(ptr nocapture readonly %a, ptr nocapture %res)
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    lxv v2, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    bl llroundf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    std r3, 0(r30)
@@ -1070,10 +1070,10 @@ define dso_local void @qpLLRound(ptr nocapture readonly %a, ptr nocapture %res)
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl llroundf128
 ; CHECK-P8-NEXT:    nop
@@ -1106,10 +1106,10 @@ define dso_local void @qpRint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl rintf128
 ; CHECK-P8-NEXT:    nop
@@ -1136,10 +1136,10 @@ define dso_local void @qpLRint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    lxv v2, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    bl lrintf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    stw r3, 0(r30)
@@ -1156,10 +1156,10 @@ define dso_local void @qpLRint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl lrintf128
 ; CHECK-P8-NEXT:    nop
@@ -1185,10 +1185,10 @@ define dso_local void @qpLLRint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    lxv v2, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    lxv v2, 0(r3)
 ; CHECK-NEXT:    bl llrintf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    std r3, 0(r30)
@@ -1205,10 +1205,10 @@ define dso_local void @qpLLRint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl llrintf128
 ; CHECK-P8-NEXT:    nop
@@ -1241,10 +1241,10 @@ define dso_local void @qpNearByInt(ptr nocapture readonly %a, ptr nocapture %res
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl nearbyintf128
 ; CHECK-P8-NEXT:    nop
@@ -1280,11 +1280,11 @@ define dso_local void @qpFMA(ptr %a, ptr %b, ptr %c, ptr %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1

diff  --git a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
index 415645599fbc1..75c2bc7bc0fa0 100644
--- a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
@@ -8,8 +8,8 @@ define i32 @test_choice1(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: test_choice1:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr 0
-; P8-NEXT:    std 0, 16(1)
 ; P8-NEXT:    stdu 1, -112(1)
+; P8-NEXT:    std 0, 128(1)
 ; P8-NEXT:    bl __nekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -36,8 +36,8 @@ define i32 @test_choice1(fp128 %a, fp128 %b) #0 {
 ; P9-LABEL: test_choice1:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr 0
-; P9-NEXT:    std 0, 16(1)
 ; P9-NEXT:    stdu 1, -112(1)
+; P9-NEXT:    std 0, 128(1)
 ; P9-NEXT:    xscmpuqp 0, 2, 3
 ; P9-NEXT:    bne 0, .LBB0_2
 ; P9-NEXT:    b .LBB0_1
@@ -78,8 +78,8 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: test_choice2:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr 0
-; P8-NEXT:    std 0, 16(1)
 ; P8-NEXT:    stdu 1, -112(1)
+; P8-NEXT:    std 0, 128(1)
 ; P8-NEXT:    bl __lekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -106,8 +106,8 @@ define i32 @test_choice2(fp128 %a, fp128 %b) #0 {
 ; P9-LABEL: test_choice2:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr 0
-; P9-NEXT:    std 0, 16(1)
 ; P9-NEXT:    stdu 1, -112(1)
+; P9-NEXT:    std 0, 128(1)
 ; P9-NEXT:    xscmpuqp 0, 2, 3
 ; P9-NEXT:    crmove 20, 3
 ; P9-NEXT:    crnot 21, 20
@@ -155,8 +155,8 @@ define i32 @test_choice3(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: test_choice3:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr 0
-; P8-NEXT:    std 0, 16(1)
 ; P8-NEXT:    stdu 1, -112(1)
+; P8-NEXT:    std 0, 128(1)
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -183,8 +183,8 @@ define i32 @test_choice3(fp128 %a, fp128 %b) #0 {
 ; P9-LABEL: test_choice3:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr 0
-; P9-NEXT:    std 0, 16(1)
 ; P9-NEXT:    stdu 1, -112(1)
+; P9-NEXT:    std 0, 128(1)
 ; P9-NEXT:    xscmpuqp 0, 2, 3
 ; P9-NEXT:    bge 0, .LBB2_2
 ; P9-NEXT:    b .LBB2_1
@@ -225,8 +225,8 @@ define i32 @test_choice4(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: test_choice4:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr 0
-; P8-NEXT:    std 0, 16(1)
 ; P8-NEXT:    stdu 1, -112(1)
+; P8-NEXT:    std 0, 128(1)
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -253,8 +253,8 @@ define i32 @test_choice4(fp128 %a, fp128 %b) #0 {
 ; P9-LABEL: test_choice4:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr 0
-; P9-NEXT:    std 0, 16(1)
 ; P9-NEXT:    stdu 1, -112(1)
+; P9-NEXT:    std 0, 128(1)
 ; P9-NEXT:    xscmpuqp 0, 2, 3
 ; P9-NEXT:    beq 0, .LBB3_2
 ; P9-NEXT:    b .LBB3_1

diff  --git a/llvm/test/CodeGen/PowerPC/f128-compare.ll b/llvm/test/CodeGen/PowerPC/f128-compare.ll
index 3be75f6ca6dc6..8eb8ae64c0aa6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-compare.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-compare.ll
@@ -27,8 +27,8 @@ define dso_local signext i32 @greater_qp() {
 ; CHECK-P8-LABEL: greater_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -75,8 +75,8 @@ define dso_local signext i32 @less_qp() {
 ; CHECK-P8-LABEL: less_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -121,8 +121,8 @@ define dso_local signext i32 @greater_eq_qp() {
 ; CHECK-P8-LABEL: greater_eq_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -168,8 +168,8 @@ define dso_local signext i32 @less_eq_qp() {
 ; CHECK-P8-LABEL: less_eq_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -217,8 +217,8 @@ define dso_local signext i32 @equal_qp() {
 ; CHECK-P8-LABEL: equal_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -263,8 +263,8 @@ define dso_local signext i32 @not_greater_qp() {
 ; CHECK-P8-LABEL: not_greater_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -312,8 +312,8 @@ define dso_local signext i32 @not_less_qp() {
 ; CHECK-P8-LABEL: not_less_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -360,8 +360,8 @@ define dso_local signext i32 @not_greater_eq_qp() {
 ; CHECK-P8-LABEL: not_greater_eq_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -407,8 +407,8 @@ define dso_local signext i32 @not_less_eq_qp() {
 ; CHECK-P8-LABEL: not_less_eq_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -455,8 +455,8 @@ define dso_local signext i32 @not_equal_qp() {
 ; CHECK-P8-LABEL: not_equal_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r3, r2, a_qp at toc@ha
@@ -503,8 +503,8 @@ define fp128 @greater_sel_qp() {
 ; CHECK-P8-LABEL: greater_sel_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
@@ -566,8 +566,8 @@ define fp128 @less_sel_qp() {
 ; CHECK-P8-LABEL: less_sel_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
@@ -630,8 +630,8 @@ define fp128 @greater_eq_sel_qp() {
 ; CHECK-P8-LABEL: greater_eq_sel_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
@@ -694,8 +694,8 @@ define fp128 @less_eq_sel_qp() {
 ; CHECK-P8-LABEL: less_eq_sel_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32
@@ -757,8 +757,8 @@ define fp128 @equal_sel_qp() {
 ; CHECK-P8-LABEL: equal_sel_qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v30, -32

diff  --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index d628ce0d55e59..9802de15dcde6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -29,10 +29,10 @@ define void @sdwConv2qp(ptr nocapture %a, i64 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -58,11 +58,11 @@ define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    mr r3, r4
 ; CHECK-NEXT:    mr r4, r5
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    bl __floattikf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    stxv v2, 0(r30)
@@ -79,11 +79,11 @@ define void @sdwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    mr r4, r5
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floattikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -118,9 +118,9 @@ define void @sdwConv2qp_02(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    ld r4, 16(r4)
@@ -159,10 +159,10 @@ define void @sdwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    ld r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    ld r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatdikf
 ; CHECK-P8-NEXT:    nop
@@ -201,11 +201,11 @@ define void @sdwConv2qp_04(ptr nocapture %a, i1 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    andi. r3, r4, 1
 ; CHECK-P8-NEXT:    li r4, -1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    li r3, 0
 ; CHECK-P8-NEXT:    iselgt r3, r4, r3
 ; CHECK-P8-NEXT:    bl __floatsikf
@@ -240,10 +240,10 @@ define void @udwConv2qp(ptr nocapture %a, i64 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -269,11 +269,11 @@ define void @udwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    mr r3, r4
 ; CHECK-NEXT:    mr r4, r5
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    bl __floatuntikf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    stxv v2, 0(r30)
@@ -290,11 +290,11 @@ define void @udwConv2qp_01(ptr nocapture %a, i128 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    mr r4, r5
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatuntikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -329,9 +329,9 @@ define void @udwConv2qp_02(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC1 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC1 at toc@l(r4)
 ; CHECK-P8-NEXT:    ld r4, 32(r4)
@@ -370,10 +370,10 @@ define void @udwConv2qp_03(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    ld r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    ld r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatundikf
 ; CHECK-P8-NEXT:    nop
@@ -409,10 +409,10 @@ define void @udwConv2qp_04(ptr nocapture %a, i1 %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    clrldi r3, r4, 63
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -447,9 +447,9 @@ define ptr @sdwConv2qp_testXForm(ptr returned %sink,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    lis r5, 1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ori r5, r5, 7797
 ; CHECK-P8-NEXT:    ldx r4, r4, r5
@@ -492,9 +492,9 @@ define ptr @udwConv2qp_testXForm(ptr returned %sink,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    lis r5, 1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ori r5, r5, 7797
 ; CHECK-P8-NEXT:    ldx r4, r4, r5
@@ -535,10 +535,10 @@ define void @swConv2qp(ptr nocapture %a, i32 signext %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -571,10 +571,10 @@ define void @swConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lwa r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    lwa r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatsikf
 ; CHECK-P8-NEXT:    nop
@@ -612,9 +612,9 @@ define void @swConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC2 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC2 at toc@l(r4)
 ; CHECK-P8-NEXT:    lwa r4, 12(r4)
@@ -653,10 +653,10 @@ define void @uwConv2qp(ptr nocapture %a, i32 zeroext %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -689,10 +689,10 @@ define void @uwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lwz r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    lwz r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
@@ -730,9 +730,9 @@ define void @uwConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC3 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC3 at toc@l(r4)
 ; CHECK-P8-NEXT:    lwz r4, 12(r4)
@@ -773,8 +773,8 @@ define void @uwConv2qp_04(ptr nocapture %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    lwz r3, 0(r5)
 ; CHECK-P8-NEXT:    add r3, r3, r4
@@ -814,10 +814,10 @@ define void @uhwConv2qp(ptr nocapture %a, i16 zeroext %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -851,10 +851,10 @@ define void @uhwConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lhz r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    lhz r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
@@ -892,9 +892,9 @@ define void @uhwConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC4 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC4 at toc@l(r4)
 ; CHECK-P8-NEXT:    lhz r4, 6(r4)
@@ -935,8 +935,8 @@ define void @uhwConv2qp_04(ptr nocapture %a, i16 zeroext %b,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    lhz r3, 0(r5)
 ; CHECK-P8-NEXT:    add r3, r3, r4
@@ -978,10 +978,10 @@ define void @ubConv2qp(ptr nocapture %a, i8 zeroext %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    mr r3, r4
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
@@ -1014,10 +1014,10 @@ define void @ubConv2qp_02(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lbz r4, 0(r4)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
+; CHECK-P8-NEXT:    lbz r4, 0(r4)
 ; CHECK-P8-NEXT:    mr r3, r4
 ; CHECK-P8-NEXT:    bl __floatunsikf
 ; CHECK-P8-NEXT:    nop
@@ -1055,9 +1055,9 @@ define void @ubConv2qp_03(ptr nocapture %a) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC5 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC5 at toc@l(r4)
 ; CHECK-P8-NEXT:    lbz r4, 2(r4)
@@ -1098,8 +1098,8 @@ define void @ubConv2qp_04(ptr nocapture %a, i8 zeroext %b,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    lbz r3, 0(r5)
 ; CHECK-P8-NEXT:    add r3, r3, r4
@@ -1146,8 +1146,8 @@ define double @qpConv2dp(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2dp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -1182,9 +1182,9 @@ define void @qpConv2dp_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
@@ -1225,10 +1225,10 @@ define void @qpConv2dp_03(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LC7 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
@@ -1270,11 +1270,11 @@ define void @qpConv2dp_04(ptr nocapture readonly %a, ptr nocapture readonly %b,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -1310,8 +1310,8 @@ define float @qpConv2sp(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2sp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -1347,9 +1347,9 @@ define void @qpConv2sp_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC6 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC6 at toc@l(r4)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
@@ -1391,10 +1391,10 @@ define void @qpConv2sp_03(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LC7 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC7 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 48
@@ -1438,11 +1438,11 @@ define void @qpConv2sp_04(ptr nocapture readonly %a, ptr nocapture readonly %b,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -1477,8 +1477,8 @@ define fp128 @dpConv2qp(double %a) {
 ; CHECK-P8-LABEL: dpConv2qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __extenddfkf2
@@ -1506,8 +1506,8 @@ define void @dpConv2qp_02(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: dpConv2qp_02:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lfd f1, 0(r3)
@@ -1543,8 +1543,8 @@ define void @dpConv2qp_02b(ptr nocapture readonly %a, i32 signext %idx) {
 ; CHECK-P8-LABEL: dpConv2qp_02b:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    sldi r4, r4, 3
@@ -1587,8 +1587,8 @@ define void @dpConv2qp_03(ptr nocapture %res, i32 signext %idx, double %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    bl __extenddfkf2
@@ -1626,8 +1626,8 @@ define void @dpConv2qp_04(double %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    bl __extenddfkf2
 ; CHECK-P8-NEXT:    nop
@@ -1655,8 +1655,8 @@ define fp128 @spConv2qp(float %a) {
 ; CHECK-P8-LABEL: spConv2qp:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __extendsfkf2
@@ -1684,8 +1684,8 @@ define void @spConv2qp_02(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: spConv2qp_02:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lfs f1, 0(r3)
@@ -1721,8 +1721,8 @@ define void @spConv2qp_02b(ptr nocapture readonly %a, i32 signext %idx) {
 ; CHECK-P8-LABEL: spConv2qp_02b:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    sldi r4, r4, 2
@@ -1765,8 +1765,8 @@ define void @spConv2qp_03(ptr nocapture %res, i32 signext %idx, float %a) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    bl __extendsfkf2
@@ -1804,8 +1804,8 @@ define void @spConv2qp_04(float %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    bl __extendsfkf2
 ; CHECK-P8-NEXT:    nop
@@ -1840,9 +1840,9 @@ define void @cvdp2sw2qp(double %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -1878,9 +1878,9 @@ define void @cvdp2sdw2qp(double %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpsxds f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatdikf
@@ -1916,9 +1916,9 @@ define void @cvsp2sw2qp(float %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    extsw r3, r3
@@ -1954,9 +1954,9 @@ define void @cvsp2sdw2qp(float %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpsxds f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatdikf
@@ -1992,9 +1992,9 @@ define void @cvdp2uw2qp(double %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    bl __floatunsikf
@@ -2029,9 +2029,9 @@ define void @cvdp2udw2qp(double %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpuxds f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatundikf
@@ -2067,9 +2067,9 @@ define void @cvsp2uw2qp(float %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpuxws f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    bl __floatunsikf
@@ -2104,9 +2104,9 @@ define void @cvsp2udw2qp(float %val, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    xscvdpuxds f0, f1
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    bl __floatundikf
@@ -2130,8 +2130,8 @@ define i128 @qpConv2i128(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2i128:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -2145,8 +2145,8 @@ define i128 @qpConv2i128(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2i128:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -2168,8 +2168,8 @@ define i128 @qpConv2ui128(ptr nocapture readonly %a) {
 ; CHECK-LABEL: qpConv2ui128:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    lxv v2, 0(r3)
@@ -2183,8 +2183,8 @@ define i128 @qpConv2ui128(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2ui128:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -2213,8 +2213,8 @@ define i1 @qpConv2ui1(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2ui1:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -2243,8 +2243,8 @@ define i1 @qpConv2si1(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2si1:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3

diff  --git a/llvm/test/CodeGen/PowerPC/f128-fma.ll b/llvm/test/CodeGen/PowerPC/f128-fma.ll
index 1b0581d7df524..2bfaecf12009f 100644
--- a/llvm/test/CodeGen/PowerPC/f128-fma.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-fma.ll
@@ -18,8 +18,8 @@ define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
 ; CHECK-P8-LABEL: qpFmadd:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -73,8 +73,8 @@ define void @qpFmadd_02(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qpFmadd_02:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -136,12 +136,12 @@ define void @qpFmadd_03(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
@@ -184,8 +184,8 @@ define void @qpFnmadd(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qpFnmadd:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -96(r1)
+; CHECK-P8-NEXT:    std r0, 112(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -254,12 +254,12 @@ define void @qpFnmadd_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
@@ -309,8 +309,8 @@ define void @qpFmsub(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qpFmsub:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -372,12 +372,12 @@ define void @qpFmsub_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3
@@ -421,8 +421,8 @@ define void @qpFnmsub(ptr nocapture readonly %a,
 ; CHECK-P8-LABEL: qpFnmsub:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -96(r1)
+; CHECK-P8-NEXT:    std r0, 112(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -491,12 +491,12 @@ define void @qpFnmsub_02(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    mr r30, r6
 ; CHECK-P8-NEXT:    mr r29, r5
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __mulkf3

diff  --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
index 77f16f29a7879..dd0493a12dcfd 100644
--- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
@@ -39,8 +39,8 @@ define fp128 @loadConstant2(fp128 %a, fp128 %b) {
 ; CHECK-P8-LABEL: loadConstant2:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -74,8 +74,8 @@ define signext i32 @fp128Param(fp128 %a) {
 ; CHECK-P8-LABEL: fp128Param:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __fixkfsi
@@ -101,8 +101,8 @@ define fp128 @fp128Return(fp128 %a, fp128 %b) {
 ; CHECK-P8-LABEL: fp128Return:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -131,8 +131,8 @@ define fp128 @fp128Array(ptr nocapture readonly %farray,
 ; CHECK-P8-LABEL: fp128Array:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    sldi r4, r4, 4
@@ -183,8 +183,8 @@ define fp128 @maxVecParam(fp128 %p1, fp128 %p2, fp128 %p3, fp128 %p4, fp128 %p5,
 ; CHECK-P8-LABEL: maxVecParam:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -224(r1)
+; CHECK-P8-NEXT:    std r0, 240(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 224
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset v21, -176
@@ -326,8 +326,8 @@ define fp128 @mixParam_01(fp128 %a, i32 signext %i, fp128 %b) {
 ; CHECK-P8-LABEL: mixParam_01:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -372,8 +372,8 @@ define fastcc fp128 @mixParam_01f(fp128 %a, i32 signext %i, fp128 %b) {
 ; CHECK-P8-LABEL: mixParam_01f:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -426,8 +426,8 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, ptr nocapture %p3,
 ; CHECK-P8-LABEL: mixParam_02:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset f31, -8
@@ -499,8 +499,8 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, ptr nocapture %p3,
 ; CHECK-P8-LABEL: mixParam_02f:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset f31, -8
@@ -571,8 +571,8 @@ define void @mixParam_03(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
 ; CHECK-P8-LABEL: mixParam_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -635,8 +635,8 @@ define fastcc void @mixParam_03f(fp128 %f1, ptr nocapture %d1, <4 x i32> %vec1,
 ; CHECK-P8-LABEL: mixParam_03f:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
@@ -686,8 +686,8 @@ define signext i32 @noopt_call_crash() #0 {
 ; CHECK-LABEL: noopt_call_crash:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -96(r1)
+; CHECK-NEXT:    std r0, 112(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl in
@@ -703,8 +703,8 @@ define signext i32 @noopt_call_crash() #0 {
 ; CHECK-P8-LABEL: noopt_call_crash:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -96(r1)
+; CHECK-P8-NEXT:    std r0, 112(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl in

diff  --git a/llvm/test/CodeGen/PowerPC/f128-rounding.ll b/llvm/test/CodeGen/PowerPC/f128-rounding.ll
index e90106c2374aa..5a925a576f7ab 100644
--- a/llvm/test/CodeGen/PowerPC/f128-rounding.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-rounding.ll
@@ -20,10 +20,10 @@ define void @qp_trunc(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl truncf128
 ; CHECK-P8-NEXT:    nop
@@ -57,10 +57,10 @@ define void @qp_rint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl rintf128
 ; CHECK-P8-NEXT:    nop
@@ -94,10 +94,10 @@ define void @qp_nearbyint(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl nearbyintf128
 ; CHECK-P8-NEXT:    nop
@@ -131,10 +131,10 @@ define void @qp_round(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl roundf128
 ; CHECK-P8-NEXT:    nop
@@ -168,10 +168,10 @@ define void @qp_floor(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl floorf128
 ; CHECK-P8-NEXT:    nop
@@ -205,10 +205,10 @@ define void @qp_ceil(ptr nocapture readonly %a, ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
+; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    bl ceilf128
 ; CHECK-P8-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
index efe84cebc7fa9..0663f62a2506b 100644
--- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll
@@ -24,8 +24,8 @@ define i64 @qpConv2sdw(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2sdw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -61,9 +61,9 @@ define void @qpConv2sdw_02(ptr nocapture %res) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -103,8 +103,8 @@ define i64 @qpConv2sdw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2sdw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -151,11 +151,11 @@ define void @qpConv2sdw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -200,10 +200,10 @@ define void @qpConv2sdw_testXForm(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -243,8 +243,8 @@ define i64 @qpConv2udw(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2udw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -280,9 +280,9 @@ define void @qpConv2udw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -322,8 +322,8 @@ define i64 @qpConv2udw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2udw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -370,11 +370,11 @@ define void @qpConv2udw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -419,10 +419,10 @@ define void @qpConv2udw_testXForm(ptr nocapture %res, i32 signext %idx) {
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
 ; CHECK-P8-NEXT:    mr r30, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    mr r29, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
@@ -460,8 +460,8 @@ define signext i32 @qpConv2sw(ptr nocapture readonly %a)  {
 ; CHECK-P8-LABEL: qpConv2sw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -498,9 +498,9 @@ define void @qpConv2sw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -541,8 +541,8 @@ define signext i32 @qpConv2sw_03(ptr nocapture readonly %a)  {
 ; CHECK-P8-LABEL: qpConv2sw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -590,11 +590,11 @@ define void @qpConv2sw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -630,8 +630,8 @@ define zeroext i32 @qpConv2uw(ptr nocapture readonly %a)  {
 ; CHECK-P8-LABEL: qpConv2uw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -667,9 +667,9 @@ define void @qpConv2uw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -709,8 +709,8 @@ define zeroext i32 @qpConv2uw_03(ptr nocapture readonly %a)  {
 ; CHECK-P8-LABEL: qpConv2uw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -757,11 +757,11 @@ define void @qpConv2uw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -799,8 +799,8 @@ define signext i16 @qpConv2shw(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2shw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -836,9 +836,9 @@ define void @qpConv2shw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -878,8 +878,8 @@ define signext i16 @qpConv2shw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2shw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -926,11 +926,11 @@ define void @qpConv2shw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -965,8 +965,8 @@ define zeroext i16 @qpConv2uhw(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2uhw:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -1001,9 +1001,9 @@ define void @qpConv2uhw_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -1042,8 +1042,8 @@ define zeroext i16 @qpConv2uhw_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2uhw_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -1089,11 +1089,11 @@ define void @qpConv2uhw_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -1129,8 +1129,8 @@ define signext i8 @qpConv2sb(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2sb:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -1166,9 +1166,9 @@ define void @qpConv2sb_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -1208,8 +1208,8 @@ define signext i8 @qpConv2sb_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2sb_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -1256,11 +1256,11 @@ define void @qpConv2sb_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -1295,8 +1295,8 @@ define zeroext i8 @qpConv2ub(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2ub:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
@@ -1331,9 +1331,9 @@ define void @qpConv2ub_02(ptr nocapture %res) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r3
 ; CHECK-P8-NEXT:    ld r4, .LC0 at toc@l(r4)
 ; CHECK-P8-NEXT:    addi r4, r4, 32
@@ -1372,8 +1372,8 @@ define zeroext i8 @qpConv2ub_03(ptr nocapture readonly %a) {
 ; CHECK-P8-LABEL: qpConv2ub_03:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    addis r4, r2, .LC0 at toc@ha
@@ -1419,11 +1419,11 @@ define void @qpConv2ub_04(ptr nocapture readonly %a,
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
+; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
-; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -1454,8 +1454,8 @@ define void @qpConvppcf128(fp128 %src, ptr %dst) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r5
 ; CHECK-NEXT:    bl __extendkftf2
 ; CHECK-NEXT:    nop
@@ -1474,8 +1474,8 @@ define void @qpConvppcf128(fp128 %src, ptr %dst) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    bl __extendkftf2
 ; CHECK-P8-NEXT:    nop
@@ -1500,8 +1500,8 @@ define void @ppcf128Convqp(ppc_fp128 %src, ptr %dst) {
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r5
 ; CHECK-NEXT:    bl __trunctfkf2
 ; CHECK-NEXT:    nop
@@ -1519,8 +1519,8 @@ define void @ppcf128Convqp(ppc_fp128 %src, ptr %dst) {
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    .cfi_offset r30, -16
 ; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    mr r30, r5
 ; CHECK-P8-NEXT:    bl __trunctfkf2
 ; CHECK-P8-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/fast-isel-branch.ll b/llvm/test/CodeGen/PowerPC/fast-isel-branch.ll
index 2fbf2a3ac21fd..826ceead17118 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-branch.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-branch.ll
@@ -8,8 +8,8 @@ define signext i32 @bar() #0 {
 ; ELF64-LABEL: bar:
 ; ELF64:       # %bb.0: # %entry
 ; ELF64-NEXT:    mflr 0
-; ELF64-NEXT:    std 0, 16(1)
 ; ELF64-NEXT:    stdu 1, -48(1)
+; ELF64-NEXT:    std 0, 64(1)
 ; ELF64-NEXT:    .cfi_def_cfa_offset 48
 ; ELF64-NEXT:    .cfi_offset lr, 16
 ; ELF64-NEXT:    li 3, 0
@@ -44,8 +44,8 @@ define signext i32 @bar() #0 {
 ; AIX64-LABEL: bar:
 ; AIX64:       # %bb.0: # %entry
 ; AIX64-NEXT:    mflr 0
-; AIX64-NEXT:    std 0, 16(1)
 ; AIX64-NEXT:    stdu 1, -128(1)
+; AIX64-NEXT:    std 0, 144(1)
 ; AIX64-NEXT:    li 3, 0
 ; AIX64-NEXT:    stw 3, 124(1)
 ; AIX64-NEXT:    li 3, 0
@@ -53,7 +53,7 @@ define signext i32 @bar() #0 {
 ; AIX64-NEXT:  L..BB0_1: # %for.cond
 ; AIX64-NEXT:    #
 ; AIX64-NEXT:    lwz 3, 120(1)
-; AIX64-NEXT:    ld 4, L..C0(2)
+; AIX64-NEXT:    ld 4, L..C0(2) # @x
 ; AIX64-NEXT:    lwz 4, 0(4)
 ; AIX64-NEXT:    cmpw 3, 4
 ; AIX64-NEXT:    bge 0, L..BB0_4

diff  --git a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
index 32ad243b6d425..a22a1cbef8e52 100644
--- a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
+++ b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
@@ -29,9 +29,9 @@ define dso_local signext i32 @test() nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -192(1)
 ; CHECK-NEXT:    addis 3, 2, a1 at toc@ha
+; CHECK-NEXT:    std 0, 208(1)
 ; CHECK-NEXT:    addis 5, 2, a16 at toc@ha
 ; CHECK-NEXT:    addis 6, 2, a17 at toc@ha
 ; CHECK-NEXT:    addis 4, 2, a15 at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index d456d2d752c09..d6a6f195f47e7 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -593,8 +593,8 @@ define double @log2_approx(double %x) nounwind {
 ; FMF-LABEL: log2_approx:
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    mflr 0
-; FMF-NEXT:    std 0, 16(1)
 ; FMF-NEXT:    stdu 1, -32(1)
+; FMF-NEXT:    std 0, 48(1)
 ; FMF-NEXT:    bl log2
 ; FMF-NEXT:    nop
 ; FMF-NEXT:    addi 1, 1, 32
@@ -605,8 +605,8 @@ define double @log2_approx(double %x) nounwind {
 ; GLOBAL-LABEL: log2_approx:
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    mflr 0
-; GLOBAL-NEXT:    std 0, 16(1)
 ; GLOBAL-NEXT:    stdu 1, -32(1)
+; GLOBAL-NEXT:    std 0, 48(1)
 ; GLOBAL-NEXT:    bl log2
 ; GLOBAL-NEXT:    nop
 ; GLOBAL-NEXT:    addi 1, 1, 32
@@ -644,3 +644,6 @@ define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
 
 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FMFDEBUG: {{.*}}
+; GLOBALDEBUG: {{.*}}

diff  --git a/llvm/test/CodeGen/PowerPC/fminnum.ll b/llvm/test/CodeGen/PowerPC/fminnum.ll
index 18db085a48376..d2b9e2b421e31 100644
--- a/llvm/test/CodeGen/PowerPC/fminnum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminnum.ll
@@ -16,8 +16,8 @@ define float @test_fminf(float %x, float %y) {
 ; CHECK-LABEL: test_fminf:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    bl fminf
@@ -33,8 +33,8 @@ define double @test_fmin(double %x, double %y) {
 ; CHECK-LABEL: test_fmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    bl fmin
@@ -50,8 +50,8 @@ define ppc_fp128 @test_fminl(ppc_fp128 %x, ppc_fp128 %y) {
 ; CHECK-LABEL: test_fminl:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -112(1)
+; CHECK-NEXT:    stw 0, 116(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    stfd 1, 40(1)
@@ -103,8 +103,8 @@ define float @test_intrinsic_fmin_f32(float %x, float %y) {
 ; CHECK-LABEL: test_intrinsic_fmin_f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    bl fminf
@@ -120,8 +120,8 @@ define double @test_intrinsic_fmin_f64(double %x, double %y) {
 ; CHECK-LABEL: test_intrinsic_fmin_f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    bl fmin
@@ -137,8 +137,8 @@ define ppc_fp128 @test_intrinsic_fmin_f128(ppc_fp128 %x, ppc_fp128 %y) {
 ; CHECK-LABEL: test_intrinsic_fmin_f128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -112(1)
+; CHECK-NEXT:    stw 0, 116(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    stfd 1, 40(1)
@@ -190,8 +190,8 @@ define <2 x float> @test_intrinsic_fminf_v2f32(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: test_intrinsic_fminf_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -32(1)
+; CHECK-NEXT:    stw 0, 36(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    .cfi_offset f29, -24
@@ -225,8 +225,8 @@ define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: test_intrinsic_fmin_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -64(1)
+; CHECK-NEXT:    stw 0, 68(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    .cfi_offset f25, -56
@@ -286,8 +286,8 @@ define <8 x float> @test_intrinsic_fmin_v8f32(<8 x float> %x, <8 x float> %y) {
 ; CHECK-LABEL: test_intrinsic_fmin_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -128(1)
+; CHECK-NEXT:    stw 0, 132(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 128
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    .cfi_offset f17, -120
@@ -399,8 +399,8 @@ define ppc_fp128 @fminnum_const(ppc_fp128 %0) {
 ; CHECK-LABEL: fminnum_const:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -96(1)
+; CHECK-NEXT:    stw 0, 100(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    stfd 1, 40(1)

diff  --git a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
index b46b1409da7d6..8ebf54a3dc489 100644
--- a/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
@@ -8,8 +8,8 @@ define float @f_i128_f(float %v) nounwind {
 ; CHECK-LABEL: f_i128_f:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl __floattisf

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
index 6e02d1ed4e146..97f9ae172e769 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
@@ -46,8 +46,8 @@ define i128 @q_to_i128(fp128 %m) #0 {
 ; P8-LABEL: q_to_i128:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixkfti
@@ -60,8 +60,8 @@ define i128 @q_to_i128(fp128 %m) #0 {
 ; P9-LABEL: q_to_i128:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixkfti
@@ -74,8 +74,8 @@ define i128 @q_to_i128(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_i128:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixkfti
@@ -93,8 +93,8 @@ define i128 @q_to_u128(fp128 %m) #0 {
 ; P8-LABEL: q_to_u128:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixunskfti
@@ -107,8 +107,8 @@ define i128 @q_to_u128(fp128 %m) #0 {
 ; P9-LABEL: q_to_u128:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixunskfti
@@ -121,8 +121,8 @@ define i128 @q_to_u128(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_u128:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixunskfti
@@ -140,8 +140,8 @@ define i1 @q_to_s1(fp128 %m) #0 {
 ; P8-LABEL: q_to_s1:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixkfsi
@@ -160,8 +160,8 @@ define i1 @q_to_s1(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_s1:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixkfsi
@@ -179,8 +179,8 @@ define i1 @q_to_u1(fp128 %m) #0 {
 ; P8-LABEL: q_to_u1:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixkfsi
@@ -199,8 +199,8 @@ define i1 @q_to_u1(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_u1:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixunskfsi
@@ -218,8 +218,8 @@ define i128 @ppcq_to_i128(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_i128:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixtfti
@@ -232,8 +232,8 @@ define i128 @ppcq_to_i128(ppc_fp128 %m) #0 {
 ; P9-LABEL: ppcq_to_i128:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixtfti
@@ -246,8 +246,8 @@ define i128 @ppcq_to_i128(ppc_fp128 %m) #0 {
 ; NOVSX-LABEL: ppcq_to_i128:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixtfti
@@ -265,8 +265,8 @@ define i128 @ppcq_to_u128(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_u128:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixtfti
@@ -279,8 +279,8 @@ define i128 @ppcq_to_u128(ppc_fp128 %m) #0 {
 ; P9-LABEL: ppcq_to_u128:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixtfti
@@ -293,8 +293,8 @@ define i128 @ppcq_to_u128(ppc_fp128 %m) #0 {
 ; NOVSX-LABEL: ppcq_to_u128:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixtfti
@@ -312,8 +312,8 @@ define signext i32 @q_to_i32(fp128 %m) #0 {
 ; P8-LABEL: q_to_i32:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixkfsi
@@ -334,8 +334,8 @@ define signext i32 @q_to_i32(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_i32:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixkfsi
@@ -354,8 +354,8 @@ define i64 @q_to_i64(fp128 %m) #0 {
 ; P8-LABEL: q_to_i64:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixkfdi
@@ -374,8 +374,8 @@ define i64 @q_to_i64(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_i64:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixkfdi
@@ -393,8 +393,8 @@ define i64 @q_to_u64(fp128 %m) #0 {
 ; P8-LABEL: q_to_u64:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixunskfdi
@@ -413,8 +413,8 @@ define i64 @q_to_u64(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_u64:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixunskfdi
@@ -432,8 +432,8 @@ define zeroext i32 @q_to_u32(fp128 %m) #0 {
 ; P8-LABEL: q_to_u32:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixunskfsi
@@ -452,8 +452,8 @@ define zeroext i32 @q_to_u32(fp128 %m) #0 {
 ; NOVSX-LABEL: q_to_u32:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixunskfsi
@@ -513,8 +513,8 @@ define i64 @ppcq_to_i64(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_i64:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixtfdi
@@ -527,8 +527,8 @@ define i64 @ppcq_to_i64(ppc_fp128 %m) #0 {
 ; P9-LABEL: ppcq_to_i64:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixtfdi
@@ -541,8 +541,8 @@ define i64 @ppcq_to_i64(ppc_fp128 %m) #0 {
 ; NOVSX-LABEL: ppcq_to_i64:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixtfdi
@@ -560,8 +560,8 @@ define i64 @ppcq_to_u64(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_u64:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __fixunstfdi
@@ -574,8 +574,8 @@ define i64 @ppcq_to_u64(ppc_fp128 %m) #0 {
 ; P9-LABEL: ppcq_to_u64:
 ; P9:       # %bb.0: # %entry
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl __fixunstfdi
@@ -588,8 +588,8 @@ define i64 @ppcq_to_u64(ppc_fp128 %m) #0 {
 ; NOVSX-LABEL: ppcq_to_u64:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __fixunstfdi
@@ -607,8 +607,8 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; P8-LABEL: ppcq_to_u32:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -128(r1)
+; P8-NEXT:    std r0, 144(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 128
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    .cfi_offset r30, -16
@@ -653,15 +653,15 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    .cfi_offset r30, -16
 ; P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -48(r1)
 ; P9-NEXT:    addis r3, r2, .LCPI13_0 at toc@ha
 ; P9-NEXT:    xxlxor f3, f3, f3
+; P9-NEXT:    std r0, 64(r1)
 ; P9-NEXT:    lfs f0, .LCPI13_0 at toc@l(r3)
 ; P9-NEXT:    fcmpo cr1, f2, f3
 ; P9-NEXT:    lis r3, -32768
-; P9-NEXT:    fcmpo cr0, f1, f0
 ; P9-NEXT:    xxlxor f3, f3, f3
+; P9-NEXT:    fcmpo cr0, f1, f0
 ; P9-NEXT:    crand 4*cr5+lt, eq, 4*cr1+lt
 ; P9-NEXT:    crandc 4*cr5+gt, lt, eq
 ; P9-NEXT:    cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
@@ -692,9 +692,9 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 {
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mfocrf r12, 32
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stw r12, 8(r1)
 ; NOVSX-NEXT:    stdu r1, -48(r1)
+; NOVSX-NEXT:    std r0, 64(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 48
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    .cfi_offset cr2, 8
@@ -742,8 +742,8 @@ define fp128 @i1_to_q(i1 signext %m) #0 {
 ; P8-LABEL: i1_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatsikf
@@ -762,8 +762,8 @@ define fp128 @i1_to_q(i1 signext %m) #0 {
 ; NOVSX-LABEL: i1_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatsikf
@@ -781,8 +781,8 @@ define fp128 @u1_to_q(i1 zeroext %m) #0 {
 ; P8-LABEL: u1_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatsikf
@@ -801,8 +801,8 @@ define fp128 @u1_to_q(i1 zeroext %m) #0 {
 ; NOVSX-LABEL: u1_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatunsikf
@@ -878,8 +878,8 @@ define fp128 @i32_to_q(i32 signext %m) #0 {
 ; P8-LABEL: i32_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatsikf
@@ -898,8 +898,8 @@ define fp128 @i32_to_q(i32 signext %m) #0 {
 ; NOVSX-LABEL: i32_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatsikf
@@ -917,8 +917,8 @@ define fp128 @i64_to_q(i64 %m) #0 {
 ; P8-LABEL: i64_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatdikf
@@ -937,8 +937,8 @@ define fp128 @i64_to_q(i64 %m) #0 {
 ; NOVSX-LABEL: i64_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatdikf
@@ -956,8 +956,8 @@ define fp128 @u32_to_q(i32 zeroext %m) #0 {
 ; P8-LABEL: u32_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatunsikf
@@ -976,8 +976,8 @@ define fp128 @u32_to_q(i32 zeroext %m) #0 {
 ; NOVSX-LABEL: u32_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatunsikf
@@ -995,8 +995,8 @@ define fp128 @u64_to_q(i64 %m) #0 {
 ; P8-LABEL: u64_to_q:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl __floatundikf
@@ -1015,8 +1015,8 @@ define fp128 @u64_to_q(i64 %m) #0 {
 ; NOVSX-LABEL: u64_to_q:
 ; NOVSX:       # %bb.0: # %entry
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    .cfi_def_cfa_offset 32
 ; NOVSX-NEXT:    .cfi_offset lr, 16
 ; NOVSX-NEXT:    bl __floatundikf

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll
index ab806a19c158e..2a7d8548956a4 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-spe.ll
@@ -38,8 +38,8 @@ define i64 @d_to_i64(double %m) #0 {
 ; SPE-LABEL: d_to_i64:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r4, r3, r4
@@ -60,8 +60,8 @@ define i64 @d_to_u64(double %m) #0 {
 ; SPE-LABEL: d_to_u64:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r4, r3, r4
@@ -103,8 +103,8 @@ define i64 @f_to_i64(float %m) #0 {
 ; SPE-LABEL: f_to_i64:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __fixsfdi
@@ -121,8 +121,8 @@ define i64 @f_to_u64(float %m) #0 {
 ; SPE-LABEL: f_to_u64:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __fixunssfdi
@@ -162,8 +162,8 @@ define double @i64_to_d(i64 %m) #0 {
 ; SPE-LABEL: i64_to_d:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __floatdidf
@@ -197,8 +197,8 @@ define double @u64_to_d(i64 %m) #0 {
 ; SPE-LABEL: u64_to_d:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __floatundidf
@@ -229,8 +229,8 @@ define float @i64_to_f(i64 %m) #0 {
 ; SPE-LABEL: i64_to_f:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __floatdisf
@@ -257,8 +257,8 @@ define float @u64_to_f(i64 %m) #0 {
 ; SPE-LABEL: u64_to_f:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl __floatundisf

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
index acb623459d3fc..8faed9b8f844e 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
@@ -21,8 +21,8 @@ define fp128 @fadd_f128(fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fadd_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __addkf3
@@ -47,8 +47,8 @@ define fp128 @fsub_f128(fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fsub_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __subkf3
@@ -73,8 +73,8 @@ define fp128 @fmul_f128(fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fmul_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __mulkf3
@@ -99,8 +99,8 @@ define fp128 @fdiv_f128(fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fdiv_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl __divkf3
@@ -126,8 +126,8 @@ define fp128 @fmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fmadd_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl fmaf128
@@ -153,8 +153,8 @@ define fp128 @fmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fmsub_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    xxswapd vs0, v4
@@ -189,8 +189,8 @@ define fp128 @fnmadd_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fnmadd_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -48(r1)
+; CHECK-P8-NEXT:    std r0, 64(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl fmaf128
@@ -225,8 +225,8 @@ define fp128 @fnmsub_f128(fp128 %f0, fp128 %f1, fp128 %f2) #0 {
 ; CHECK-P8-LABEL: fnmsub_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -64(r1)
+; CHECK-P8-NEXT:    std r0, 80(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    xxswapd vs0, v4
@@ -270,8 +270,8 @@ define fp128 @fsqrt_f128(fp128 %f1) #0 {
 ; CHECK-P8-LABEL: fsqrt_f128:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r0, 16(r1)
 ; CHECK-P8-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-NEXT:    std r0, 48(r1)
 ; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-NEXT:    bl sqrtf128

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
index 338f2a5efca32..9a44e195a25e3 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll
@@ -1485,8 +1485,8 @@ define i32 @fcmp_olt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_olt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1506,8 +1506,8 @@ define i32 @fcmp_olt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_olt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __ltkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1524,8 +1524,8 @@ define i32 @fcmp_ole_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ole_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __lekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -1548,8 +1548,8 @@ define i32 @fcmp_ole_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ole_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __lekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -1569,8 +1569,8 @@ define i32 @fcmp_ogt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ogt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gtkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -1592,8 +1592,8 @@ define i32 @fcmp_ogt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ogt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gtkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -1612,8 +1612,8 @@ define i32 @fcmp_oge_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_oge_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1634,8 +1634,8 @@ define i32 @fcmp_oge_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_oge_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1653,8 +1653,8 @@ define i32 @fcmp_oeq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_oeq_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
@@ -1675,8 +1675,8 @@ define i32 @fcmp_oeq_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_oeq_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __eqkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    cntlzw r3, r3
@@ -1694,9 +1694,9 @@ define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_one_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -176(r1)
 ; P8-NEXT:    li r3, 128
+; P8-NEXT:    std r0, 192(r1)
 ; P8-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
@@ -1741,8 +1741,8 @@ define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -80(r1)
+; NOVSX-NEXT:    std r0, 96(r1)
 ; NOVSX-NEXT:    mr r30, r6
 ; NOVSX-NEXT:    mr r29, r5
 ; NOVSX-NEXT:    mr r28, r4
@@ -1779,8 +1779,8 @@ define i32 @fcmp_ult_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ult_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1800,8 +1800,8 @@ define i32 @fcmp_ult_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ult_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1818,8 +1818,8 @@ define i32 @fcmp_ule_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ule_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gtkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -1841,8 +1841,8 @@ define i32 @fcmp_ule_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ule_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gtkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -1862,8 +1862,8 @@ define i32 @fcmp_ugt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ugt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __lekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -1885,8 +1885,8 @@ define i32 @fcmp_ugt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_ugt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __lekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -1905,8 +1905,8 @@ define i32 @fcmp_uge_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_uge_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1926,8 +1926,8 @@ define i32 @fcmp_uge_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_uge_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __ltkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -1945,9 +1945,9 @@ define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_ueq_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -176(r1)
 ; P8-NEXT:    li r3, 128
+; P8-NEXT:    std r0, 192(r1)
 ; P8-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
@@ -1992,8 +1992,8 @@ define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -80(r1)
+; NOVSX-NEXT:    std r0, 96(r1)
 ; NOVSX-NEXT:    mr r30, r6
 ; NOVSX-NEXT:    mr r29, r5
 ; NOVSX-NEXT:    mr r28, r4
@@ -2030,8 +2030,8 @@ define i32 @fcmp_une_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmp_une_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __nekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
@@ -2052,8 +2052,8 @@ define i32 @fcmp_une_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmp_une_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __nekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    cntlzw r3, r3
@@ -2072,8 +2072,8 @@ define i32 @fcmps_olt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_olt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2093,8 +2093,8 @@ define i32 @fcmps_olt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_olt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __ltkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2111,8 +2111,8 @@ define i32 @fcmps_ole_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ole_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __lekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -2135,8 +2135,8 @@ define i32 @fcmps_ole_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ole_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __lekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -2156,8 +2156,8 @@ define i32 @fcmps_ogt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ogt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gtkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -2179,8 +2179,8 @@ define i32 @fcmps_ogt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ogt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gtkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -2199,8 +2199,8 @@ define i32 @fcmps_oge_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_oge_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2221,8 +2221,8 @@ define i32 @fcmps_oge_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_oge_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2240,8 +2240,8 @@ define i32 @fcmps_oeq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_oeq_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __eqkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
@@ -2262,8 +2262,8 @@ define i32 @fcmps_oeq_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_oeq_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __eqkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    cntlzw r3, r3
@@ -2281,9 +2281,9 @@ define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_one_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -176(r1)
 ; P8-NEXT:    li r3, 128
+; P8-NEXT:    std r0, 192(r1)
 ; P8-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
@@ -2328,8 +2328,8 @@ define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -80(r1)
+; NOVSX-NEXT:    std r0, 96(r1)
 ; NOVSX-NEXT:    mr r30, r6
 ; NOVSX-NEXT:    mr r29, r5
 ; NOVSX-NEXT:    mr r28, r4
@@ -2366,8 +2366,8 @@ define i32 @fcmps_ult_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ult_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2387,8 +2387,8 @@ define i32 @fcmps_ult_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ult_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2405,8 +2405,8 @@ define i32 @fcmps_ule_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ule_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __gtkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -2428,8 +2428,8 @@ define i32 @fcmps_ule_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ule_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __gtkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -2449,8 +2449,8 @@ define i32 @fcmps_ugt_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ugt_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __lekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    extsw r3, r3
@@ -2472,8 +2472,8 @@ define i32 @fcmps_ugt_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_ugt_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __lekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    extsw r3, r3
@@ -2492,8 +2492,8 @@ define i32 @fcmps_uge_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_uge_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __ltkf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2513,8 +2513,8 @@ define i32 @fcmps_uge_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_uge_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __ltkf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    rlwinm r3, r3, 1, 31, 31
@@ -2532,9 +2532,9 @@ define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_ueq_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -176(r1)
 ; P8-NEXT:    li r3, 128
+; P8-NEXT:    std r0, 192(r1)
 ; P8-NEXT:    std r30, 160(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 144
@@ -2579,8 +2579,8 @@ define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; NOVSX-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -80(r1)
+; NOVSX-NEXT:    std r0, 96(r1)
 ; NOVSX-NEXT:    mr r30, r6
 ; NOVSX-NEXT:    mr r29, r5
 ; NOVSX-NEXT:    mr r28, r4
@@ -2617,8 +2617,8 @@ define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 {
 ; P8-LABEL: fcmps_une_f128:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    bl __nekf2
 ; P8-NEXT:    nop
 ; P8-NEXT:    cntlzw r3, r3
@@ -2639,8 +2639,8 @@ define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 {
 ; NOVSX-LABEL: fcmps_une_f128:
 ; NOVSX:       # %bb.0:
 ; NOVSX-NEXT:    mflr r0
-; NOVSX-NEXT:    std r0, 16(r1)
 ; NOVSX-NEXT:    stdu r1, -32(r1)
+; NOVSX-NEXT:    std r0, 48(r1)
 ; NOVSX-NEXT:    bl __nekf2
 ; NOVSX-NEXT:    nop
 ; NOVSX-NEXT:    cntlzw r3, r3

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
index 0ca0cb9ae94d2..4de7d1a05f50e 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -171,8 +171,8 @@ define double @nearbyint_f64(double %f1, double %f2) {
 ; P8-LABEL: nearbyint_f64:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 112
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    bl nearbyint
@@ -185,8 +185,8 @@ define double @nearbyint_f64(double %f1, double %f2) {
 ; P9-LABEL: nearbyint_f64:
 ; P9:       # %bb.0:
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -32(r1)
+; P9-NEXT:    std r0, 48(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 32
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    bl nearbyint
@@ -206,8 +206,8 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
 ; P8-LABEL: nearbyint_v4f32:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -176(r1)
+; P8-NEXT:    std r0, 192(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 176
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    .cfi_offset v29, -48
@@ -258,8 +258,8 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
 ; P9-LABEL: nearbyint_v4f32:
 ; P9:       # %bb.0:
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -80(r1)
+; P9-NEXT:    std r0, 96(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 80
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    .cfi_offset v29, -48
@@ -311,8 +311,8 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
 ; P8-LABEL: nearbyint_v2f64:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -160(r1)
+; P8-NEXT:    std r0, 176(r1)
 ; P8-NEXT:    .cfi_def_cfa_offset 160
 ; P8-NEXT:    .cfi_offset lr, 16
 ; P8-NEXT:    .cfi_offset v30, -32
@@ -344,8 +344,8 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
 ; P9-LABEL: nearbyint_v2f64:
 ; P9:       # %bb.0:
 ; P9-NEXT:    mflr r0
-; P9-NEXT:    std r0, 16(r1)
 ; P9-NEXT:    stdu r1, -64(r1)
+; P9-NEXT:    std r0, 80(r1)
 ; P9-NEXT:    .cfi_def_cfa_offset 64
 ; P9-NEXT:    .cfi_offset lr, 16
 ; P9-NEXT:    .cfi_offset v30, -32

diff  --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index f4f8aeafc9635..3865f237379b4 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -594,8 +594,8 @@ define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
 ; SPE-LABEL: fmadd_f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl fmaf
@@ -625,8 +625,8 @@ define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
 ; SPE-LABEL: fmadd_f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r8, r7, r8
@@ -699,8 +699,8 @@ define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; SPE-LABEL: fmadd_v4f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -96(r1)
+; SPE-NEXT:    stw r0, 100(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 96
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r21, -88
@@ -791,8 +791,8 @@ define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub
 ; SPE-LABEL: fmadd_v2f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -64(r1)
+; SPE-NEXT:    stw r0, 68(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 64
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r26, -48
@@ -868,8 +868,8 @@ define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
 ; SPE-LABEL: fmsub_f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    efsneg r5, r5
@@ -901,8 +901,8 @@ define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
 ; SPE-LABEL: fmsub_f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r6, r5, r6
@@ -980,8 +980,8 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; SPE-LABEL: fmsub_v4f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -96(r1)
+; SPE-NEXT:    stw r0, 100(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 96
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r21, -88
@@ -1077,8 +1077,8 @@ define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x doub
 ; SPE-LABEL: fmsub_v2f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -64(r1)
+; SPE-NEXT:    stw r0, 68(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 64
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r26, -48
@@ -1157,8 +1157,8 @@ define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
 ; SPE-LABEL: fnmadd_f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl fmaf
@@ -1190,8 +1190,8 @@ define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
 ; SPE-LABEL: fnmadd_f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r8, r7, r8
@@ -1269,8 +1269,8 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; SPE-LABEL: fnmadd_v4f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -96(r1)
+; SPE-NEXT:    stw r0, 100(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 96
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r21, -88
@@ -1363,8 +1363,8 @@ define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou
 ; SPE-LABEL: fnmadd_v2f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -64(r1)
+; SPE-NEXT:    stw r0, 68(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 64
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r26, -48
@@ -1443,8 +1443,8 @@ define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
 ; SPE-LABEL: fnmsub_f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    efsneg r5, r5
@@ -1478,8 +1478,8 @@ define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
 ; SPE-LABEL: fnmsub_f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r6, r5, r6
@@ -1560,8 +1560,8 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; SPE-LABEL: fnmsub_v4f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -96(r1)
+; SPE-NEXT:    stw r0, 100(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 96
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r21, -88
@@ -1659,8 +1659,8 @@ define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x dou
 ; SPE-LABEL: fnmsub_v2f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -64(r1)
+; SPE-NEXT:    stw r0, 68(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 64
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r26, -48
@@ -1741,8 +1741,8 @@ define float @fsqrt_f32(float %f1) #0 {
 ; SPE-LABEL: fsqrt_f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    bl sqrtf
@@ -1771,8 +1771,8 @@ define double @fsqrt_f64(double %f1) #0 {
 ; SPE-LABEL: fsqrt_f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    stw r0, 20(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 16
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    evmergelo r4, r3, r4
@@ -1824,8 +1824,8 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
 ; SPE-LABEL: fsqrt_v4f32:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -48(r1)
+; SPE-NEXT:    stw r0, 52(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 48
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r27, -40
@@ -1883,8 +1883,8 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
 ; SPE-LABEL: fsqrt_v2f64:
 ; SPE:       # %bb.0:
 ; SPE-NEXT:    mflr r0
-; SPE-NEXT:    stw r0, 4(r1)
 ; SPE-NEXT:    stwu r1, -48(r1)
+; SPE-NEXT:    stw r0, 52(r1)
 ; SPE-NEXT:    .cfi_def_cfa_offset 48
 ; SPE-NEXT:    .cfi_offset lr, 4
 ; SPE-NEXT:    .cfi_offset r28, -32

diff  --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 7eaf3e4cea392..27021b649f873 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -92,8 +92,8 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC64-P8-LE-LABEL: test_copysign:
 ; PPC64-P8-LE:       # %bb.0: # %entry
 ; PPC64-P8-LE-NEXT:    mflr 0
-; PPC64-P8-LE-NEXT:    std 0, 16(1)
 ; PPC64-P8-LE-NEXT:    stdu 1, -32(1)
+; PPC64-P8-LE-NEXT:    std 0, 48(1)
 ; PPC64-P8-LE-NEXT:    bl copysignl
 ; PPC64-P8-LE-NEXT:    nop
 ; PPC64-P8-LE-NEXT:    mffprd 3, 1
@@ -106,8 +106,8 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC64-LE-LABEL: test_copysign:
 ; PPC64-LE:       # %bb.0: # %entry
 ; PPC64-LE-NEXT:    mflr 0
-; PPC64-LE-NEXT:    std 0, 16(1)
 ; PPC64-LE-NEXT:    stdu 1, -48(1)
+; PPC64-LE-NEXT:    std 0, 64(1)
 ; PPC64-LE-NEXT:    bl copysignl
 ; PPC64-LE-NEXT:    nop
 ; PPC64-LE-NEXT:    stfd 1, 32(1)
@@ -122,8 +122,8 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC64-P8-BE-LABEL: test_copysign:
 ; PPC64-P8-BE:       # %bb.0: # %entry
 ; PPC64-P8-BE-NEXT:    mflr 0
-; PPC64-P8-BE-NEXT:    std 0, 16(1)
 ; PPC64-P8-BE-NEXT:    stdu 1, -112(1)
+; PPC64-P8-BE-NEXT:    std 0, 128(1)
 ; PPC64-P8-BE-NEXT:    bl copysignl
 ; PPC64-P8-BE-NEXT:    nop
 ; PPC64-P8-BE-NEXT:    mffprd 3, 1
@@ -136,8 +136,8 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC64-BE-LABEL: test_copysign:
 ; PPC64-BE:       # %bb.0: # %entry
 ; PPC64-BE-NEXT:    mflr 0
-; PPC64-BE-NEXT:    std 0, 16(1)
 ; PPC64-BE-NEXT:    stdu 1, -128(1)
+; PPC64-BE-NEXT:    std 0, 144(1)
 ; PPC64-BE-NEXT:    bl copysignl
 ; PPC64-BE-NEXT:    nop
 ; PPC64-BE-NEXT:    stfd 1, 112(1)
@@ -152,8 +152,8 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC32-LABEL: test_copysign:
 ; PPC32:       # %bb.0: # %entry
 ; PPC32-NEXT:    mflr 0
-; PPC32-NEXT:    stw 0, 4(1)
 ; PPC32-NEXT:    stwu 1, -96(1)
+; PPC32-NEXT:    stw 0, 100(1)
 ; PPC32-NEXT:    stfd 1, 40(1)
 ; PPC32-NEXT:    lwz 3, 44(1)
 ; PPC32-NEXT:    stfd 2, 32(1)

diff  --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll
index 4df4f8aff56a9..8cb68e60f7f9b 100644
--- a/llvm/test/CodeGen/PowerPC/frem.ll
+++ b/llvm/test/CodeGen/PowerPC/frem.ll
@@ -5,8 +5,8 @@ define float @frem32(float %a, float %b) {
 ; CHECK-LABEL: frem32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl fmodf
@@ -24,8 +24,8 @@ define double @frem64(double %a, double %b) {
 ; CHECK-LABEL: frem64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl fmod
@@ -43,8 +43,8 @@ define <4 x float> @frem4x32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: frem4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -96(1)
+; CHECK-NEXT:    std 0, 112(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset v28, -64
@@ -105,8 +105,8 @@ define <2 x double> @frem2x64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: frem2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -80(1)
+; CHECK-NEXT:    std 0, 96(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset v29, -48

diff  --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 10e2fc0326f95..128165aef4eaf 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -234,8 +234,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK32_32-LABEL: fshl_i37:
 ; CHECK32_32:       # %bb.0:
 ; CHECK32_32-NEXT:    mflr 0
-; CHECK32_32-NEXT:    stw 0, 4(1)
 ; CHECK32_32-NEXT:    stwu 1, -32(1)
+; CHECK32_32-NEXT:    stw 0, 36(1)
 ; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK32_32-NEXT:    .cfi_offset lr, 4
 ; CHECK32_32-NEXT:    .cfi_offset r27, -20
@@ -289,8 +289,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK32_64-LABEL: fshl_i37:
 ; CHECK32_64:       # %bb.0:
 ; CHECK32_64-NEXT:    mflr 0
-; CHECK32_64-NEXT:    stw 0, 4(1)
 ; CHECK32_64-NEXT:    stwu 1, -32(1)
+; CHECK32_64-NEXT:    stw 0, 36(1)
 ; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK32_64-NEXT:    .cfi_offset lr, 4
 ; CHECK32_64-NEXT:    .cfi_offset r27, -20
@@ -534,8 +534,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK32_32-LABEL: fshr_i37:
 ; CHECK32_32:       # %bb.0:
 ; CHECK32_32-NEXT:    mflr 0
-; CHECK32_32-NEXT:    stw 0, 4(1)
 ; CHECK32_32-NEXT:    stwu 1, -32(1)
+; CHECK32_32-NEXT:    stw 0, 36(1)
 ; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK32_32-NEXT:    .cfi_offset lr, 4
 ; CHECK32_32-NEXT:    .cfi_offset r27, -20
@@ -590,8 +590,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK32_64-LABEL: fshr_i37:
 ; CHECK32_64:       # %bb.0:
 ; CHECK32_64-NEXT:    mflr 0
-; CHECK32_64-NEXT:    stw 0, 4(1)
 ; CHECK32_64-NEXT:    stwu 1, -32(1)
+; CHECK32_64-NEXT:    stw 0, 36(1)
 ; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK32_64-NEXT:    .cfi_offset lr, 4
 ; CHECK32_64-NEXT:    .cfi_offset r27, -20

diff  --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index 99188066de30d..63f9267a7cab7 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -14,8 +14,8 @@ define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0
 ; P8-LABEL: loadd:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 2(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -34,8 +34,8 @@ define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr #0
 ; SOFT-LABEL: loadd:
 ; SOFT:       # %bb.0: # %entry
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 2(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -58,8 +58,8 @@ define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
 ; P8-LABEL: loadf:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 2(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -78,8 +78,8 @@ define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr #0 {
 ; SOFT-LABEL: loadf:
 ; SOFT:       # %bb.0: # %entry
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 2(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -101,8 +101,8 @@ define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
@@ -123,10 +123,10 @@ define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr #0
 ; SOFT:       # %bb.0: # %entry
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
 ; SOFT-NEXT:    mr r30, r3
 ; SOFT-NEXT:    mr r3, r4
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    clrldi r3, r3, 48
@@ -153,8 +153,8 @@ define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
@@ -175,10 +175,10 @@ define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr #0
 ; SOFT:       # %bb.0: # %entry
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
 ; SOFT-NEXT:    mr r30, r3
 ; SOFT-NEXT:    clrldi r3, r4, 32
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    bl __gnu_f2h_ieee
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    clrldi r3, r3, 48
@@ -216,10 +216,10 @@ define void @test_load_store(ptr %in, ptr %out) #0 {
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
-; SOFT-NEXT:    lhz r3, 0(r3)
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    mr r30, r4
+; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    bl __gnu_f2h_ieee
@@ -276,8 +276,8 @@ define float @test_extend32(ptr %addr) #0 {
 ; P8-LABEL: test_extend32:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 0(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -295,8 +295,8 @@ define float @test_extend32(ptr %addr) #0 {
 ; SOFT-LABEL: test_extend32:
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -312,8 +312,8 @@ define double @test_extend64(ptr %addr) #0 {
 ; P8-LABEL: test_extend64:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 0(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -331,8 +331,8 @@ define double @test_extend64(ptr %addr) #0 {
 ; SOFT-LABEL: test_extend64:
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -351,8 +351,8 @@ define void @test_trunc32(float %in, ptr %addr) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r4
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
@@ -373,9 +373,9 @@ define void @test_trunc32(float %in, ptr %addr) #0 {
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
 ; SOFT-NEXT:    clrldi r3, r3, 32
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __gnu_f2h_ieee
 ; SOFT-NEXT:    nop
@@ -399,8 +399,8 @@ define void @test_trunc64(double %in, ptr %addr) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r4
 ; P8-NEXT:    bl __truncdfhf2
 ; P8-NEXT:    nop
@@ -421,8 +421,8 @@ define void @test_trunc64(double %in, ptr %addr) #0 {
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __truncdfhf2
 ; SOFT-NEXT:    nop
@@ -445,8 +445,8 @@ define i64 @test_fptosi_i64(ptr %p) #0 {
 ; P8-LABEL: test_fptosi_i64:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 0(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -469,8 +469,8 @@ define i64 @test_fptosi_i64(ptr %p) #0 {
 ; SOFT-LABEL: test_fptosi_i64:
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -489,9 +489,9 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
 ; P8-NEXT:    mtfprd f0, r3
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r4
 ; P8-NEXT:    xscvsxdsp f1, f0
 ; P8-NEXT:    bl __gnu_f2h_ieee
@@ -516,8 +516,8 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __floatdisf
 ; SOFT-NEXT:    nop
@@ -543,8 +543,8 @@ define i64 @test_fptoui_i64(ptr %p) #0 {
 ; P8-LABEL: test_fptoui_i64:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    lhz r3, 0(r3)
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
@@ -567,8 +567,8 @@ define i64 @test_fptoui_i64(ptr %p) #0 {
 ; SOFT-LABEL: test_fptoui_i64:
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
@@ -587,9 +587,9 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -48(r1)
 ; P8-NEXT:    mtfprd f0, r3
+; P8-NEXT:    std r0, 64(r1)
 ; P8-NEXT:    mr r30, r4
 ; P8-NEXT:    xscvuxdsp f1, f0
 ; P8-NEXT:    bl __gnu_f2h_ieee
@@ -614,8 +614,8 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -48(r1)
+; SOFT-NEXT:    std r0, 64(r1)
 ; SOFT-NEXT:    mr r30, r4
 ; SOFT-NEXT:    bl __floatundisf
 ; SOFT-NEXT:    nop
@@ -640,8 +640,8 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
 ; P8-LABEL: test_extend32_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    li r4, 48
 ; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
@@ -711,8 +711,8 @@ define <4 x float> @test_extend32_vec4(ptr %p) #0 {
 ; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r3
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
@@ -749,8 +749,8 @@ define <4 x double> @test_extend64_vec4(ptr %p) #0 {
 ; P8-LABEL: test_extend64_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    li r4, 48
 ; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    mr r30, r3
@@ -814,8 +814,8 @@ define <4 x double> @test_extend64_vec4(ptr %p) #0 {
 ; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -80(r1)
+; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r3
 ; SOFT-NEXT:    lhz r3, 0(r3)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
@@ -860,17 +860,17 @@ define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
 ; P8-LABEL: test_trunc32_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -112(r1)
 ; P8-NEXT:    xxsldwi vs0, vs34, vs34, 3
 ; P8-NEXT:    li r3, 48
+; P8-NEXT:    std r0, 128(r1)
 ; P8-NEXT:    std r27, 72(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r28, 80(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r29, 88(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r5
 ; P8-NEXT:    xscvspdpn f1, vs0
+; P8-NEXT:    std r30, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
+; P8-NEXT:    mr r30, r5
 ; P8-NEXT:    vmr v31, v2
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
@@ -934,10 +934,10 @@ define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
 ; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -80(r1)
 ; SOFT-NEXT:    mr r27, r3
 ; SOFT-NEXT:    clrldi r3, r6, 32
+; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r7
 ; SOFT-NEXT:    mr r29, r5
 ; SOFT-NEXT:    mr r28, r4
@@ -1002,18 +1002,18 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
 ; P8-LABEL: test_trunc64_vec4:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -128(r1)
 ; P8-NEXT:    li r3, 48
+; P8-NEXT:    std r0, 144(r1)
 ; P8-NEXT:    xxswapd vs1, vs34
 ; P8-NEXT:    std r27, 88(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r28, 96(r1) # 8-byte Folded Spill
 ; P8-NEXT:    std r29, 104(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; P8-NEXT:    mr r30, r7
 ; P8-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; P8-NEXT:    stxvd2x vs62, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    li r3, 64
+; P8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
+; P8-NEXT:    mr r30, r7
 ; P8-NEXT:    vmr v30, v2
 ; P8-NEXT:    stxvd2x vs63, r1, r3 # 16-byte Folded Spill
 ; P8-NEXT:    vmr v31, v3
@@ -1075,10 +1075,10 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
 ; SOFT-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -80(r1)
 ; SOFT-NEXT:    mr r27, r3
 ; SOFT-NEXT:    mr r3, r6
+; SOFT-NEXT:    std r0, 96(r1)
 ; SOFT-NEXT:    mr r30, r7
 ; SOFT-NEXT:    mr r29, r5
 ; SOFT-NEXT:    mr r28, r4
@@ -1145,8 +1145,8 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
 ; P8-NEXT:    mflr r0
 ; P8-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
 ; P8-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -64(r1)
+; P8-NEXT:    std r0, 80(r1)
 ; P8-NEXT:    mr r30, r3
 ; P8-NEXT:    lhz r3, 0(r4)
 ; P8-NEXT:    bl __gnu_h2f_ieee
@@ -1187,8 +1187,8 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
 ; SOFT-NEXT:    mflr r0
 ; SOFT-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; SOFT-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -64(r1)
+; SOFT-NEXT:    std r0, 80(r1)
 ; SOFT-NEXT:    mr r30, r3
 ; SOFT-NEXT:    lhz r3, 0(r4)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
@@ -1223,8 +1223,8 @@ define half @PR40273(half) #0 {
 ; P8-LABEL: PR40273:
 ; P8:       # %bb.0:
 ; P8-NEXT:    mflr r0
-; P8-NEXT:    std r0, 16(r1)
 ; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
 ; P8-NEXT:    clrldi r3, r3, 48
@@ -1261,9 +1261,9 @@ define half @PR40273(half) #0 {
 ; SOFT-LABEL: PR40273:
 ; SOFT:       # %bb.0:
 ; SOFT-NEXT:    mflr r0
-; SOFT-NEXT:    std r0, 16(r1)
 ; SOFT-NEXT:    stdu r1, -32(r1)
 ; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    std r0, 48(r1)
 ; SOFT-NEXT:    bl __gnu_h2f_ieee
 ; SOFT-NEXT:    nop
 ; SOFT-NEXT:    li r4, 0

diff  --git a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index 76a32e0557b20..b98a51f4f875b 100644
--- a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -73,9 +73,9 @@ entry:
 
 ; CHECK-LABEL: @main
 
-; CHECK-DAG: mr [[REG:[0-9]+]], 3
-; CHECK-DAG: li 0, 1076
-; CHECK:     stw [[REG]],
+; CHECK: mr [[REG:[0-9]+]], 3
+; CHECK: std 0,
+; CHECK: stw [[REG]],
 
 ; CHECK:     #APP
 ; CHECK:     sc

diff  --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
index 381e295d40616..3d48e0f7ef923 100644
--- a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
+++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
@@ -7,9 +7,9 @@ define dso_local zeroext i32 @test(i32 signext %l) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    addi r3, r3, -1
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    cmplwi r3, 5
 ; CHECK-NEXT:    bgt cr0, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll b/llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll
index 2649671ea9a5c..87a96c368d988 100644
--- a/llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll
+++ b/llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll
@@ -6,8 +6,8 @@ define dso_local signext i32 @caller(i32 signext %a, i32 signext %b) local_unnam
 ; CHECK-LABEL: caller:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -320(r1)
+; CHECK-NEXT:    std r0, 336(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 320
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset r14, -288

diff  --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
index 061989b952ca3..e7d5d6c3847f8 100644
--- a/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
@@ -10,9 +10,9 @@ define void @cos_f64(ptr %arg) {
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    .cfi_offset f31, -8
 ; CHECK-LNX-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -48(1)
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LNX-NEXT:    std 0, 64(1)
 ; CHECK-LNX-NEXT:    xssqrtdp 31, 0
 ; CHECK-LNX-NEXT:    lfs 1, .LCPI0_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    bl __xl_cos
@@ -27,8 +27,8 @@ define void @cos_f64(ptr %arg) {
 ; CHECK-AIX-LABEL: cos_f64:
 ; CHECK-AIX:       # %bb.0: # %bb
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; CHECK-AIX-NEXT:    bl .sqrt[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -66,9 +66,9 @@ define void @log_f64(ptr %arg) {
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    .cfi_offset f31, -8
 ; CHECK-LNX-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -48(1)
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LNX-NEXT:    std 0, 64(1)
 ; CHECK-LNX-NEXT:    xssqrtdp 31, 0
 ; CHECK-LNX-NEXT:    lfs 1, .LCPI1_0 at toc@l(3)
 ; CHECK-LNX-NEXT:    bl __xl_log
@@ -83,8 +83,8 @@ define void @log_f64(ptr %arg) {
 ; CHECK-AIX-LABEL: log_f64:
 ; CHECK-AIX:       # %bb.0: # %bb
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; CHECK-AIX-NEXT:    bl .sqrt[PR]
 ; CHECK-AIX-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
index 72326ff6b2bc0..3fe1b7086c697 100644
--- a/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
@@ -6,8 +6,8 @@ define void @cos_f64(ptr %arg) {
 ; CHECK-LNX-LABEL: cos_f64:
 ; CHECK-LNX:       # %bb.0: # %bb
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
@@ -25,8 +25,8 @@ define void @cos_f64(ptr %arg) {
 ; CHECK-AIX-LABEL: cos_f64:
 ; CHECK-AIX:       # %bb.0: # %bb
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; CHECK-AIX-NEXT:    bl .sqrt[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -60,8 +60,8 @@ define void @log_f64(ptr %arg) {
 ; CHECK-LNX-LABEL: log_f64:
 ; CHECK-LNX:       # %bb.0: # %bb
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
@@ -79,8 +79,8 @@ define void @log_f64(ptr %arg) {
 ; CHECK-AIX-LABEL: log_f64:
 ; CHECK-AIX:       # %bb.0: # %bb
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; CHECK-AIX-NEXT:    bl .sqrt[PR]
 ; CHECK-AIX-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll
index facf56222b02a..1c8e029c5a31e 100644
--- a/llvm/test/CodeGen/PowerPC/machine-pre.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll
@@ -62,11 +62,11 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) nounwind {
 ; CHECK-P9-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-P9-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P9-NEXT:    std r0, 16(r1)
 ; CHECK-P9-NEXT:    stdu r1, -80(r1)
 ; CHECK-P9-NEXT:    mr r30, r4
 ; CHECK-P9-NEXT:    mr r29, r3
 ; CHECK-P9-NEXT:    lis r3, 21845
+; CHECK-P9-NEXT:    std r0, 96(r1)
 ; CHECK-P9-NEXT:    add r28, r30, r29
 ; CHECK-P9-NEXT:    ori r27, r3, 21846
 ; CHECK-P9-NEXT:    b .LBB1_4

diff  --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index 4042eba402ff7..d301f0172866e 100644
--- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -150,9 +150,9 @@ define i1 @length2_eq_nobuiltin_attr(ptr %X, ptr %Y) nounwind {
 ; CHECK-LABEL: length2_eq_nobuiltin_attr:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
 ; CHECK-NEXT:    li 5, 2
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    bl memcmp
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    cntlzw 3, 3

diff  --git a/llvm/test/CodeGen/PowerPC/no-duplicate.ll b/llvm/test/CodeGen/PowerPC/no-duplicate.ll
index 932ef1aa106b7..e4e0640bde542 100644
--- a/llvm/test/CodeGen/PowerPC/no-duplicate.ll
+++ b/llvm/test/CodeGen/PowerPC/no-duplicate.ll
@@ -6,6 +6,7 @@ target triple = "powerpc64le-grtev4-linux-gnu"
 define void @no_duplicate1(i64 %a) {
 ; CHECK-LABEL: no_duplicate1
 ; CHECK:        mr 30, 3
+; CHECK-NEXT:   std 0, 64(1)
 ; CHECK-NEXT:   b .LBB0_2
 
 ; CHECK:      .LBB0_2:
@@ -39,6 +40,7 @@ end:
 define void @no_duplicate2(i64 %a) {
 ; CHECK-LABEL: no_duplicate2
 ; CHECK:        mr 30, 3
+; CHECK-NEXT:   std 0, 64(1)
 ; CHECK-NEXT:   b .LBB1_2
 
 ; CHECK:      .LBB1_2:

diff  --git a/llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll b/llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll
index ffb54edb68a8c..e7778e7a4aa5d 100644
--- a/llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll
+++ b/llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll
@@ -44,11 +44,11 @@ define dso_local signext i32 @caller(i32 signext %a, i32 signext %b, i32 signext
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -192(r1)
+; CHECK-NEXT:    std r0, 208(r1)
 ; CHECK-NEXT:    std r5, 32(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r0, r4
+; CHECK-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld r3, 40(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    add r3, r3, r0

diff  --git a/llvm/test/CodeGen/PowerPC/out-of-range-dform.ll b/llvm/test/CodeGen/PowerPC/out-of-range-dform.ll
index 5168944a2d857..aa717bc48838f 100644
--- a/llvm/test/CodeGen/PowerPC/out-of-range-dform.ll
+++ b/llvm/test/CodeGen/PowerPC/out-of-range-dform.ll
@@ -9,8 +9,8 @@ define dso_local void @main() local_unnamed_addr personality ptr @__gxx_personal
 ; CHECK-P9-LABEL: main:
 ; CHECK-P9:       # %bb.0: # %bb
 ; CHECK-P9-NEXT:    mflr r0
-; CHECK-P9-NEXT:    std r0, 16(r1)
 ; CHECK-P9-NEXT:    stdu r1, -32(r1)
+; CHECK-P9-NEXT:    std r0, 48(r1)
 ; CHECK-P9-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P9-NEXT:    .cfi_offset lr, 16
 ; CHECK-P9-NEXT:    bl malloc

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
index 7512d1189eb4a..2063f511afc65 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel_ldst.ll
@@ -2490,8 +2490,8 @@ define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 {
 ; CHECK-P8-LE-LABEL: store_f128_to_uint:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    mflr r0
-; CHECK-P8-LE-NEXT:    std r0, 16(r1)
 ; CHECK-P8-LE-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-LE-NEXT:    std r0, 48(r1)
 ; CHECK-P8-LE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-LE-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-LE-NEXT:    bl __fixunskfdi
@@ -2506,8 +2506,8 @@ define dso_local void @store_f128_to_uint(fp128 %str) local_unnamed_addr #0 {
 ; CHECK-P8-BE-LABEL: store_f128_to_uint:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    mflr r0
-; CHECK-P8-BE-NEXT:    std r0, 16(r1)
 ; CHECK-P8-BE-NEXT:    stdu r1, -112(r1)
+; CHECK-P8-BE-NEXT:    std r0, 128(r1)
 ; CHECK-P8-BE-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-P8-BE-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-BE-NEXT:    bl __fixunskfdi
@@ -2551,8 +2551,8 @@ define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 {
 ; CHECK-P8-LE-LABEL: store_f128_to_sint:
 ; CHECK-P8-LE:       # %bb.0: # %entry
 ; CHECK-P8-LE-NEXT:    mflr r0
-; CHECK-P8-LE-NEXT:    std r0, 16(r1)
 ; CHECK-P8-LE-NEXT:    stdu r1, -32(r1)
+; CHECK-P8-LE-NEXT:    std r0, 48(r1)
 ; CHECK-P8-LE-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-P8-LE-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-LE-NEXT:    bl __fixkfdi
@@ -2567,8 +2567,8 @@ define dso_local void @store_f128_to_sint(fp128 %str) local_unnamed_addr #0 {
 ; CHECK-P8-BE-LABEL: store_f128_to_sint:
 ; CHECK-P8-BE:       # %bb.0: # %entry
 ; CHECK-P8-BE-NEXT:    mflr r0
-; CHECK-P8-BE-NEXT:    std r0, 16(r1)
 ; CHECK-P8-BE-NEXT:    stdu r1, -112(r1)
+; CHECK-P8-BE-NEXT:    std r0, 128(r1)
 ; CHECK-P8-BE-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-P8-BE-NEXT:    .cfi_offset lr, 16
 ; CHECK-P8-BE-NEXT:    bl __fixkfdi

diff  --git a/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll b/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
index 05f732488a067..de1a92d8b44f5 100644
--- a/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
+++ b/llvm/test/CodeGen/PowerPC/pow-025-075-intrinsic-scalar-mass-fast.ll
@@ -40,9 +40,9 @@ define float @llvmintr_powf_f32_fast025(float %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_powf_f32_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -104,9 +104,9 @@ define double @llvmintr_pow_f64_fast025(double %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_pow_f64_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -155,9 +155,9 @@ define float @llvmintr_powf_f32_fast075(float %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_powf_f32_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C2(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -221,9 +221,9 @@ define double @llvmintr_pow_f64_fast075(double %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_pow_f64_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C3(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -242,8 +242,8 @@ define float @llvmintr_powf_f32_fast050(float %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_powf_f32_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
@@ -258,9 +258,9 @@ define float @llvmintr_powf_f32_fast050(float %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_powf_f32_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C4(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -279,8 +279,8 @@ define double @llvmintr_pow_f64_fast050(double %a) #1 {
 ; CHECK-LNX-LABEL: llvmintr_pow_f64_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
@@ -295,9 +295,9 @@ define double @llvmintr_pow_f64_fast050(double %a) #1 {
 ; CHECK-AIX-LABEL: llvmintr_pow_f64_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C5(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll b/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
index 70441d5517d9c..c43bccc3f2398 100644
--- a/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
+++ b/llvm/test/CodeGen/PowerPC/pow-025-075-nointrinsic-scalar-mass-fast.ll
@@ -13,8 +13,8 @@ define float @powf_f32_fast025(float %a) #1 {
 ; CHECK-LNX-LABEL: powf_f32_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
@@ -29,9 +29,9 @@ define float @powf_f32_fast025(float %a) #1 {
 ; CHECK-AIX-LABEL: powf_f32_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -50,8 +50,8 @@ define double @pow_f64_fast025(double %a) #1 {
 ; CHECK-LNX-LABEL: pow_f64_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
@@ -66,9 +66,9 @@ define double @pow_f64_fast025(double %a) #1 {
 ; CHECK-AIX-LABEL: pow_f64_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -87,8 +87,8 @@ define float @powf_f32_fast075(float %a) #1 {
 ; CHECK-LNX-LABEL: powf_f32_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
@@ -103,9 +103,9 @@ define float @powf_f32_fast075(float %a) #1 {
 ; CHECK-AIX-LABEL: powf_f32_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C2(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -124,8 +124,8 @@ define double @pow_f64_fast075(double %a) #1 {
 ; CHECK-LNX-LABEL: pow_f64_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI3_0 at toc@ha
@@ -140,9 +140,9 @@ define double @pow_f64_fast075(double %a) #1 {
 ; CHECK-AIX-LABEL: pow_f64_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C3(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -161,8 +161,8 @@ define float @powf_f32_fast050(float %a) #1 {
 ; CHECK-LNX-LABEL: powf_f32_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
@@ -177,9 +177,9 @@ define float @powf_f32_fast050(float %a) #1 {
 ; CHECK-AIX-LABEL: powf_f32_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C4(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -198,8 +198,8 @@ define double @pow_f64_fast050(double %a) #1 {
 ; CHECK-LNX-LABEL: pow_f64_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI5_0 at toc@ha
@@ -214,9 +214,9 @@ define double @pow_f64_fast050(double %a) #1 {
 ; CHECK-AIX-LABEL: pow_f64_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C5(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -237,8 +237,8 @@ define float @__powf_finite_f32_fast025(float %a) #1 {
 ; CHECK-LNX-LABEL: __powf_finite_f32_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
@@ -253,9 +253,9 @@ define float @__powf_finite_f32_fast025(float %a) #1 {
 ; CHECK-AIX-LABEL: __powf_finite_f32_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C6(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -274,8 +274,8 @@ define double @__pow_finite_f64_fast025(double %a) #1 {
 ; CHECK-LNX-LABEL: __pow_finite_f64_fast025:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
@@ -290,9 +290,9 @@ define double @__pow_finite_f64_fast025(double %a) #1 {
 ; CHECK-AIX-LABEL: __pow_finite_f64_fast025:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C7(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -311,8 +311,8 @@ define float @__powf_finite_f32_fast075(float %a) #1 {
 ; CHECK-LNX-LABEL: __powf_finite_f32_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
@@ -327,9 +327,9 @@ define float @__powf_finite_f32_fast075(float %a) #1 {
 ; CHECK-AIX-LABEL: __powf_finite_f32_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C8(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -348,8 +348,8 @@ define double @__pow_finite_f64_fast075(double %a) #1 {
 ; CHECK-LNX-LABEL: __pow_finite_f64_fast075:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
@@ -364,9 +364,9 @@ define double @__pow_finite_f64_fast075(double %a) #1 {
 ; CHECK-AIX-LABEL: __pow_finite_f64_fast075:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C9(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -385,8 +385,8 @@ define float @__powf_finite_f32_fast050(float %a) #1 {
 ; CHECK-LNX-LABEL: __powf_finite_f32_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
@@ -401,9 +401,9 @@ define float @__powf_finite_f32_fast050(float %a) #1 {
 ; CHECK-AIX-LABEL: __powf_finite_f32_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C10(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_powf_finite[PR]
 ; CHECK-AIX-NEXT:    nop
@@ -422,8 +422,8 @@ define double @__pow_finite_f64_fast050(double %a) #1 {
 ; CHECK-LNX-LABEL: __pow_finite_f64_fast050:
 ; CHECK-LNX:       # %bb.0: # %entry
 ; CHECK-LNX-NEXT:    mflr 0
-; CHECK-LNX-NEXT:    std 0, 16(1)
 ; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    std 0, 48(1)
 ; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-LNX-NEXT:    .cfi_offset lr, 16
 ; CHECK-LNX-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
@@ -438,9 +438,9 @@ define double @__pow_finite_f64_fast050(double %a) #1 {
 ; CHECK-AIX-LABEL: __pow_finite_f64_fast050:
 ; CHECK-AIX:       # %bb.0: # %entry
 ; CHECK-AIX-NEXT:    mflr 0
-; CHECK-AIX-NEXT:    stw 0, 8(1)
 ; CHECK-AIX-NEXT:    stwu 1, -64(1)
 ; CHECK-AIX-NEXT:    lwz 3, L..C11(2) # %const.0
+; CHECK-AIX-NEXT:    stw 0, 72(1)
 ; CHECK-AIX-NEXT:    lfs 2, 0(3)
 ; CHECK-AIX-NEXT:    bl .__xl_pow_finite[PR]
 ; CHECK-AIX-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-prologue.ll b/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
index 289121c1bcd2c..745d0c449bc95 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -2,9 +2,9 @@
 
 define i32 @_Z4funci(i32 %a) ssp {
 ; CHECK:       mflr 0
-; CHECK-NEXT:  stw 0, 4(1)
 ; CHECK-NEXT:  stwu 1, -32(1)
 ; CHECK-NEXT:  stw 31, 28(1)
+; CHECK-NEXT:  stw 0, 36(1)
 ; CHECK:  mr 31, 1
 entry:
   %a_addr = alloca i32                            ; <ptr> [#uses=2]

diff  --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
index 5a4aa4385e640..aa1f469f9e394 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-32,ENABLE-32
 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-32,DISABLE-32
 ; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
-; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
+; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64,DISABLE-64-AIX
 ;
 ;
 ; Note: Lots of tests use inline asm instead of regular calls.
@@ -30,6 +30,8 @@
 ; Compare the arguments and jump to exit.
 ; After the prologue is set.
 ; DISABLE: cmpw 3, 4
+; DISABLE-32: stw 0,
+; DISABLE-64-AIX: std 0, 
 ; DISABLE-NEXT: bge 0, {{.*}}[[EXIT_LABEL:BB[0-9_]+]]
 ;
 ; Store %a on the stack
@@ -478,6 +480,8 @@ if.end:                                           ; preds = %for.body, %if.else
 ; CHECK: mflr {{[0-9]+}}
 ;
 ; DISABLE: cmplwi 3, 0
+; DISABLE-32: stw 0, 72(1)
+; DISABLE-64-AIX: std 0,
 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 ;
 ; Setup of the varags.
@@ -494,6 +498,7 @@ if.end:                                           ; preds = %for.body, %if.else
 ; CHECK-32-NEXT: mr 7, 4
 ; CHECK-32-NEXT: mr 8, 4
 ; CHECK-32-NEXT: mr 9, 4
+; ENABLE-32-NEXT: stw 0, 72(1)
 ;
 ; CHECK-NEXT: bl {{.*}}someVariadicFunc
 ; CHECK: slwi 3, 3, 3
@@ -539,6 +544,8 @@ declare i32 @someVariadicFunc(i32, ...)
 ; DISABLE: mflr {{[0-9]+}}
 ;
 ; CHECK: cmplwi 3, 0
+; DISABLE-32: stw 0, 72(1)
+; DISABLE-64-AIX: std 0,
 ; CHECK-NEXT: bne{{[-]?}} 0, {{.*}}[[ABORT:BB[0-9_]+]]
 ;
 ; CHECK: li 3, 42

diff  --git a/llvm/test/CodeGen/PowerPC/ppc32-nest.ll b/llvm/test/CodeGen/PowerPC/ppc32-nest.ll
index e581fd548654a..2e3790d6b282a 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-nest.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-nest.ll
@@ -17,6 +17,7 @@ define ptr @nest_receiver(ptr nest %arg) nounwind {
 define ptr @nest_caller(ptr %arg) nounwind {
 ; CHECK-LABEL: nest_caller:
 ; CHECK: mr 11, 3
+; CHECK: stw 0, 20(1)
 ; CHECK-NEXT: bl nest_receiver
 ; CHECK: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index 9cb61a3d457fe..533439ff8af8a 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -1243,8 +1243,8 @@ define i64 @setbf128(fp128 %a, fp128 %b) {
 ; CHECK-PWR8-LABEL: setbf128:
 ; CHECK-PWR8:       # %bb.0:
 ; CHECK-PWR8-NEXT:    mflr r0
-; CHECK-PWR8-NEXT:    std r0, 16(r1)
 ; CHECK-PWR8-NEXT:    stdu r1, -96(r1)
+; CHECK-PWR8-NEXT:    std r0, 112(r1)
 ; CHECK-PWR8-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r30, -16

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
index 4f79f9293bb91..429b8774f6ec3 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-larger-struct.ll
@@ -16,8 +16,8 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_9:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stb r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 71
 ; P8LE-NEXT:    std r3, 48(r1)
@@ -36,12 +36,12 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_9:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    stb r4, 56(r1)
 ; P9LE-NEXT:    addi r4, r1, 71
-; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    lbz r5, 56(r1)
+; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    stdx r3, 0, r4
 ; P9LE-NEXT:    mr r3, r4
 ; P9LE-NEXT:    stb r5, 79(r1)
@@ -75,8 +75,8 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_9:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stb r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 135
 ; P8BE-NEXT:    std r3, 192(r1)
@@ -95,12 +95,12 @@ define signext i8 @caller_9(ptr nocapture readonly byval([9 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_9:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    stb r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 135
-; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    lbz r5, 200(r1)
+; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    stb r5, 143(r1)
@@ -175,16 +175,16 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P8LE-LABEL: caller_9_callee_9:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stb r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 71
 ; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    lbz r4, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r5
-; P8LE-NEXT:    ld r3, 48(r1)
 ; P8LE-NEXT:    stb r4, 79(r1)
 ; P8LE-NEXT:    lbz r4, 56(r1)
+; P8LE-NEXT:    ld r3, 48(r1)
 ; P8LE-NEXT:    bl callee_9
 ; P8LE-NEXT:    nop
 ; P8LE-NEXT:    li r3, 0
@@ -196,12 +196,12 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P9LE-LABEL: caller_9_callee_9:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    stb r4, 56(r1)
 ; P9LE-NEXT:    addi r4, r1, 71
-; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    lbz r5, 56(r1)
+; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    stdx r3, 0, r4
 ; P9LE-NEXT:    lbz r4, 56(r1)
 ; P9LE-NEXT:    ld r3, 48(r1)
@@ -237,16 +237,16 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P8BE-LABEL: caller_9_callee_9:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stb r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 135
 ; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    lbz r4, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
-; P8BE-NEXT:    ld r3, 192(r1)
 ; P8BE-NEXT:    stb r4, 143(r1)
 ; P8BE-NEXT:    lbz r4, 200(r1)
+; P8BE-NEXT:    ld r3, 192(r1)
 ; P8BE-NEXT:    bl callee_9
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -258,12 +258,12 @@ define signext i8 @caller_9_callee_9(ptr nocapture readonly byval([9 x i8]) %dat
 ; P9BE-LABEL: caller_9_callee_9:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    stb r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 135
-; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    lbz r5, 200(r1)
+; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    lbz r4, 200(r1)
 ; P9BE-NEXT:    ld r3, 192(r1)
@@ -340,8 +340,8 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_10:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    sth r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 70
 ; P8LE-NEXT:    std r3, 48(r1)
@@ -360,12 +360,12 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_10:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    sth r4, 56(r1)
 ; P9LE-NEXT:    addi r4, r1, 70
-; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    lhz r5, 56(r1)
+; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    stdx r3, 0, r4
 ; P9LE-NEXT:    mr r3, r4
 ; P9LE-NEXT:    sth r5, 78(r1)
@@ -399,8 +399,8 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_10:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    sth r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 134
 ; P8BE-NEXT:    std r3, 192(r1)
@@ -419,12 +419,12 @@ define signext i8 @caller_10(ptr nocapture readonly byval([10 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_10:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    sth r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 134
-; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    lhz r5, 200(r1)
+; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    stdx r3, 0, r4
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    sth r5, 142(r1)
@@ -503,8 +503,8 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_12:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stw r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 68
 ; P8LE-NEXT:    std r3, 48(r1)
@@ -523,12 +523,12 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_12:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    stw r4, 56(r1)
 ; P9LE-NEXT:    addi r4, r1, 68
-; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    lwz r5, 56(r1)
+; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    std r3, 68(r1)
 ; P9LE-NEXT:    mr r3, r4
 ; P9LE-NEXT:    stw r5, 76(r1)
@@ -562,8 +562,8 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_12:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    stw r4, 200(r1)
 ; P8BE-NEXT:    addi r5, r1, 132
 ; P8BE-NEXT:    std r3, 192(r1)
@@ -582,12 +582,12 @@ define signext i8 @caller_12(ptr nocapture readonly byval([12 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_12:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    stw r4, 200(r1)
 ; P9BE-NEXT:    addi r4, r1, 132
-; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    lwz r5, 200(r1)
+; P9BE-NEXT:    std r3, 192(r1)
 ; P9BE-NEXT:    std r3, 132(r1)
 ; P9BE-NEXT:    mr r3, r4
 ; P9BE-NEXT:    stw r5, 140(r1)
@@ -674,8 +674,8 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_14:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    stw r4, 56(r1)
 ; P8LE-NEXT:    addi r5, r1, 66
 ; P8LE-NEXT:    rldicl r4, r4, 32, 32
@@ -696,11 +696,11 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_14:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    stw r4, 56(r1)
-; P9LE-NEXT:    rldicl r4, r4, 32, 32
 ; P9LE-NEXT:    lwz r5, 56(r1)
+; P9LE-NEXT:    rldicl r4, r4, 32, 32
 ; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    sth r4, 60(r1)
 ; P9LE-NEXT:    addi r4, r1, 66
@@ -739,16 +739,16 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_14:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    rldicl r6, r4, 48, 16
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    addi r5, r1, 130
 ; P8BE-NEXT:    std r3, 192(r1)
-; P8BE-NEXT:    sth r4, 204(r1)
-; P8BE-NEXT:    stw r6, 200(r1)
 ; P8BE-NEXT:    stdx r3, 0, r5
 ; P8BE-NEXT:    mr r3, r5
+; P8BE-NEXT:    stw r6, 200(r1)
 ; P8BE-NEXT:    lwz r6, 200(r1)
+; P8BE-NEXT:    sth r4, 204(r1)
 ; P8BE-NEXT:    stw r6, 138(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
@@ -761,9 +761,9 @@ define signext i8 @caller_14(ptr nocapture readonly byval([14 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_14:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
 ; P9BE-NEXT:    rldicl r5, r4, 48, 16
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    sth r4, 204(r1)
 ; P9BE-NEXT:    addi r4, r1, 130
 ; P9BE-NEXT:    std r3, 192(r1)
@@ -857,9 +857,9 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_16:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -80(r1)
 ; P8LE-NEXT:    addi r5, r1, 64
+; P8LE-NEXT:    std r0, 96(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
 ; P8LE-NEXT:    std r4, 56(r1)
 ; P8LE-NEXT:    std r3, 64(r1)
@@ -876,9 +876,9 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_16:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -80(r1)
 ; P9LE-NEXT:    addi r5, r1, 64
+; P9LE-NEXT:    std r0, 96(r1)
 ; P9LE-NEXT:    std r3, 48(r1)
 ; P9LE-NEXT:    std r4, 56(r1)
 ; P9LE-NEXT:    stw r4, 72(r1)
@@ -913,9 +913,9 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_16:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    addi r5, r1, 128
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    rldicl r6, r4, 32, 32
 ; P8BE-NEXT:    std r3, 192(r1)
 ; P8BE-NEXT:    std r4, 200(r1)
@@ -933,8 +933,8 @@ define signext i8 @caller_16(ptr nocapture readonly byval([16 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_16:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    std r4, 200(r1)
 ; P9BE-NEXT:    rldicl r5, r4, 32, 32
 ; P9BE-NEXT:    addi r4, r1, 128
@@ -1025,14 +1025,14 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_18:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -96(r1)
 ; P8LE-NEXT:    addi r6, r1, 78
+; P8LE-NEXT:    std r0, 112(r1)
 ; P8LE-NEXT:    std r3, 48(r1)
-; P8LE-NEXT:    sth r5, 64(r1)
 ; P8LE-NEXT:    std r4, 56(r1)
 ; P8LE-NEXT:    stdx r3, 0, r6
 ; P8LE-NEXT:    mr r3, r6
+; P8LE-NEXT:    sth r5, 64(r1)
 ; P8LE-NEXT:    stw r4, 86(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
@@ -1045,8 +1045,8 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_18:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -96(r1)
+; P9LE-NEXT:    std r0, 112(r1)
 ; P9LE-NEXT:    sth r5, 64(r1)
 ; P9LE-NEXT:    addi r5, r1, 78
 ; P9LE-NEXT:    std r3, 48(r1)
@@ -1084,9 +1084,9 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_18:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -144(r1)
 ; P8BE-NEXT:    addi r6, r1, 126
+; P8BE-NEXT:    std r0, 160(r1)
 ; P8BE-NEXT:    sth r5, 208(r1)
 ; P8BE-NEXT:    rldicl r5, r4, 32, 32
 ; P8BE-NEXT:    std r3, 192(r1)
@@ -1105,8 +1105,8 @@ define signext i8 @caller_18(ptr nocapture readonly byval([18 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_18:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -144(r1)
+; P9BE-NEXT:    std r0, 160(r1)
 ; P9BE-NEXT:    std r4, 200(r1)
 ; P9BE-NEXT:    sth r5, 208(r1)
 ; P9BE-NEXT:    rldicl r5, r4, 32, 32

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
index 8a258c4137032..42b19e5e96a68 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-multi-store.ll
@@ -16,8 +16,8 @@ define signext i8 @caller_1(ptr nocapture readonly byval([1 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stb r3, 48(r1)
 ; P8LE-NEXT:    lbz r3, 48(r1)
 ; P8LE-NEXT:    stb r3, 63(r1)
@@ -33,8 +33,8 @@ define signext i8 @caller_1(ptr nocapture readonly byval([1 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_1:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    stb r3, 48(r1)
 ; P9LE-NEXT:    lbz r3, 48(r1)
 ; P9LE-NEXT:    stb r3, 63(r1)
@@ -66,8 +66,8 @@ define signext i8 @caller_1(ptr nocapture readonly byval([1 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_1:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    stb r3, 183(r1)
 ; P8BE-NEXT:    lbz r3, 183(r1)
 ; P8BE-NEXT:    stb r3, 127(r1)
@@ -83,8 +83,8 @@ define signext i8 @caller_1(ptr nocapture readonly byval([1 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_1:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    stb r3, 183(r1)
 ; P9BE-NEXT:    lbz r3, 183(r1)
 ; P9BE-NEXT:    stb r3, 127(r1)
@@ -125,8 +125,8 @@ define signext i8 @caller_2(ptr nocapture readonly byval([2 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_2:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    sth r3, 48(r1)
 ; P8LE-NEXT:    lhz r3, 48(r1)
 ; P8LE-NEXT:    sth r3, 62(r1)
@@ -142,8 +142,8 @@ define signext i8 @caller_2(ptr nocapture readonly byval([2 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_2:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    sth r3, 48(r1)
 ; P9LE-NEXT:    lhz r3, 48(r1)
 ; P9LE-NEXT:    sth r3, 62(r1)
@@ -175,8 +175,8 @@ define signext i8 @caller_2(ptr nocapture readonly byval([2 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_2:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    sth r3, 182(r1)
 ; P8BE-NEXT:    lhz r3, 182(r1)
 ; P8BE-NEXT:    sth r3, 126(r1)
@@ -192,8 +192,8 @@ define signext i8 @caller_2(ptr nocapture readonly byval([2 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_2:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    sth r3, 182(r1)
 ; P9BE-NEXT:    lhz r3, 182(r1)
 ; P9BE-NEXT:    sth r3, 126(r1)
@@ -238,8 +238,8 @@ define signext i8 @caller_3(ptr nocapture readonly byval([3 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_3:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    rldicl r4, r3, 48, 16
 ; P8LE-NEXT:    sth r3, 48(r1)
 ; P8LE-NEXT:    stb r4, 50(r1)
@@ -259,8 +259,8 @@ define signext i8 @caller_3(ptr nocapture readonly byval([3 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_3:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    sth r3, 48(r1)
 ; P9LE-NEXT:    rldicl r3, r3, 48, 16
 ; P9LE-NEXT:    stb r3, 50(r1)
@@ -300,9 +300,9 @@ define signext i8 @caller_3(ptr nocapture readonly byval([3 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_3:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
 ; P8BE-NEXT:    rldicl r4, r3, 56, 8
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    stb r3, 183(r1)
 ; P8BE-NEXT:    sth r4, 181(r1)
 ; P8BE-NEXT:    lbz r4, 183(r1)
@@ -321,9 +321,9 @@ define signext i8 @caller_3(ptr nocapture readonly byval([3 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_3:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
 ; P9BE-NEXT:    rldicl r4, r3, 56, 8
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    stb r3, 183(r1)
 ; P9BE-NEXT:    sth r4, 181(r1)
 ; P9BE-NEXT:    lbz r4, 183(r1)
@@ -379,8 +379,8 @@ define signext i8 @caller_4(ptr nocapture readonly byval([4 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_4:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stw r3, 48(r1)
 ; P8LE-NEXT:    lwz r3, 48(r1)
 ; P8LE-NEXT:    stw r3, 60(r1)
@@ -396,8 +396,8 @@ define signext i8 @caller_4(ptr nocapture readonly byval([4 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_4:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    stw r3, 48(r1)
 ; P9LE-NEXT:    lwz r3, 48(r1)
 ; P9LE-NEXT:    stw r3, 60(r1)
@@ -429,8 +429,8 @@ define signext i8 @caller_4(ptr nocapture readonly byval([4 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_4:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    stw r3, 180(r1)
 ; P8BE-NEXT:    lwz r3, 180(r1)
 ; P8BE-NEXT:    stw r3, 124(r1)
@@ -446,8 +446,8 @@ define signext i8 @caller_4(ptr nocapture readonly byval([4 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_4:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    stw r3, 180(r1)
 ; P9BE-NEXT:    lwz r3, 180(r1)
 ; P9BE-NEXT:    stw r3, 124(r1)
@@ -500,14 +500,14 @@ define signext i8 @caller_5(ptr nocapture readonly byval([5 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_5:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
 ; P8LE-NEXT:    rldicl r4, r3, 32, 32
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stw r3, 48(r1)
-; P8LE-NEXT:    stw r3, 59(r1)
-; P8LE-NEXT:    addi r3, r1, 59
 ; P8LE-NEXT:    stb r4, 52(r1)
 ; P8LE-NEXT:    lbz r4, 52(r1)
+; P8LE-NEXT:    stw r3, 59(r1)
+; P8LE-NEXT:    addi r3, r1, 59
 ; P8LE-NEXT:    stb r4, 63(r1)
 ; P8LE-NEXT:    bl callee
 ; P8LE-NEXT:    nop
@@ -520,9 +520,9 @@ define signext i8 @caller_5(ptr nocapture readonly byval([5 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_5:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
 ; P9LE-NEXT:    rldicl r4, r3, 32, 32
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    stw r3, 48(r1)
 ; P9LE-NEXT:    stw r3, 59(r1)
 ; P9LE-NEXT:    addi r3, r1, 59
@@ -559,8 +559,8 @@ define signext i8 @caller_5(ptr nocapture readonly byval([5 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_5:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    stb r3, 183(r1)
 ; P8BE-NEXT:    rldicl r3, r3, 56, 8
 ; P8BE-NEXT:    lbz r4, 183(r1)
@@ -579,11 +579,11 @@ define signext i8 @caller_5(ptr nocapture readonly byval([5 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_5:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    stb r3, 183(r1)
-; P9BE-NEXT:    rldicl r3, r3, 56, 8
 ; P9BE-NEXT:    lbz r4, 183(r1)
+; P9BE-NEXT:    rldicl r3, r3, 56, 8
 ; P9BE-NEXT:    stw r3, 179(r1)
 ; P9BE-NEXT:    stw r3, 123(r1)
 ; P9BE-NEXT:    addi r3, r1, 123
@@ -643,8 +643,8 @@ define signext i8 @caller_6(ptr nocapture readonly byval([6 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_6:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    rldicl r4, r3, 32, 32
 ; P8LE-NEXT:    stw r3, 48(r1)
 ; P8LE-NEXT:    sth r4, 52(r1)
@@ -664,8 +664,8 @@ define signext i8 @caller_6(ptr nocapture readonly byval([6 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_6:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    stw r3, 48(r1)
 ; P9LE-NEXT:    rldicl r3, r3, 32, 32
 ; P9LE-NEXT:    sth r3, 52(r1)
@@ -705,9 +705,9 @@ define signext i8 @caller_6(ptr nocapture readonly byval([6 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_6:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
 ; P8BE-NEXT:    rldicl r4, r3, 48, 16
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    sth r3, 182(r1)
 ; P8BE-NEXT:    stw r4, 178(r1)
 ; P8BE-NEXT:    lhz r4, 182(r1)
@@ -726,9 +726,9 @@ define signext i8 @caller_6(ptr nocapture readonly byval([6 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_6:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
 ; P9BE-NEXT:    rldicl r4, r3, 48, 16
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    sth r3, 182(r1)
 ; P9BE-NEXT:    stw r4, 178(r1)
 ; P9BE-NEXT:    lhz r4, 182(r1)
@@ -796,17 +796,17 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P8LE-LABEL: caller_7:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    mflr r0
-; P8LE-NEXT:    std r0, 16(r1)
 ; P8LE-NEXT:    stdu r1, -64(r1)
 ; P8LE-NEXT:    rldicl r4, r3, 32, 32
 ; P8LE-NEXT:    rldicl r5, r3, 16, 48
+; P8LE-NEXT:    std r0, 80(r1)
 ; P8LE-NEXT:    stw r3, 48(r1)
-; P8LE-NEXT:    stw r3, 57(r1)
-; P8LE-NEXT:    addi r3, r1, 57
 ; P8LE-NEXT:    sth r4, 52(r1)
 ; P8LE-NEXT:    stb r5, 54(r1)
 ; P8LE-NEXT:    lhz r4, 52(r1)
 ; P8LE-NEXT:    lbz r5, 54(r1)
+; P8LE-NEXT:    stw r3, 57(r1)
+; P8LE-NEXT:    addi r3, r1, 57
 ; P8LE-NEXT:    sth r4, 61(r1)
 ; P8LE-NEXT:    stb r5, 63(r1)
 ; P8LE-NEXT:    bl callee
@@ -820,9 +820,9 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P9LE-LABEL: caller_7:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    mflr r0
-; P9LE-NEXT:    std r0, 16(r1)
 ; P9LE-NEXT:    stdu r1, -64(r1)
 ; P9LE-NEXT:    rldicl r4, r3, 32, 32
+; P9LE-NEXT:    std r0, 80(r1)
 ; P9LE-NEXT:    stw r3, 48(r1)
 ; P9LE-NEXT:    stw r3, 57(r1)
 ; P9LE-NEXT:    sth r4, 52(r1)
@@ -867,9 +867,9 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P8BE-LABEL: caller_7:
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    mflr r0
-; P8BE-NEXT:    std r0, 16(r1)
 ; P8BE-NEXT:    stdu r1, -128(r1)
 ; P8BE-NEXT:    rldicl r4, r3, 56, 8
+; P8BE-NEXT:    std r0, 144(r1)
 ; P8BE-NEXT:    stb r3, 183(r1)
 ; P8BE-NEXT:    rldicl r3, r3, 40, 24
 ; P8BE-NEXT:    sth r4, 181(r1)
@@ -878,8 +878,8 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P8BE-NEXT:    stw r3, 177(r1)
 ; P8BE-NEXT:    stw r3, 121(r1)
 ; P8BE-NEXT:    addi r3, r1, 121
-; P8BE-NEXT:    stb r5, 127(r1)
 ; P8BE-NEXT:    sth r4, 125(r1)
+; P8BE-NEXT:    stb r5, 127(r1)
 ; P8BE-NEXT:    bl callee
 ; P8BE-NEXT:    nop
 ; P8BE-NEXT:    li r3, 0
@@ -891,17 +891,17 @@ define signext i8 @caller_7(ptr nocapture readonly byval([7 x i8]) %data) #0 {
 ; P9BE-LABEL: caller_7:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    mflr r0
-; P9BE-NEXT:    std r0, 16(r1)
 ; P9BE-NEXT:    stdu r1, -128(r1)
 ; P9BE-NEXT:    rldicl r4, r3, 56, 8
+; P9BE-NEXT:    std r0, 144(r1)
 ; P9BE-NEXT:    stb r3, 183(r1)
 ; P9BE-NEXT:    lbz r5, 183(r1)
 ; P9BE-NEXT:    rldicl r3, r3, 40, 24
 ; P9BE-NEXT:    sth r4, 181(r1)
 ; P9BE-NEXT:    stw r3, 177(r1)
+; P9BE-NEXT:    lhz r4, 181(r1)
 ; P9BE-NEXT:    stw r3, 121(r1)
 ; P9BE-NEXT:    addi r3, r1, 121
-; P9BE-NEXT:    lhz r4, 181(r1)
 ; P9BE-NEXT:    stb r5, 127(r1)
 ; P9BE-NEXT:    sth r4, 125(r1)
 ; P9BE-NEXT:    bl callee

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll
index 1eddb24ad5141..57bc882f6046e 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll
@@ -8,8 +8,8 @@ define dso_local void @ClobberLR() local_unnamed_addr #0 {
 ; PPC64LE-LABEL: ClobberLR:
 ; PPC64LE:       # %bb.0: # %entry
 ; PPC64LE-NEXT:    mflr r0
-; PPC64LE-NEXT:    std r0, 16(r1)
 ; PPC64LE-NEXT:    stdu r1, -32(r1)
+; PPC64LE-NEXT:    std r0, 48(r1)
 ; PPC64LE-NEXT:    #APP
 ; PPC64LE-NEXT:    #NO_APP
 ; PPC64LE-NEXT:    addi r1, r1, 32
@@ -20,8 +20,8 @@ define dso_local void @ClobberLR() local_unnamed_addr #0 {
 ; PPC64BE-LABEL: ClobberLR:
 ; PPC64BE:       # %bb.0: # %entry
 ; PPC64BE-NEXT:    mflr r0
-; PPC64BE-NEXT:    std r0, 16(r1)
 ; PPC64BE-NEXT:    stdu r1, -48(r1)
+; PPC64BE-NEXT:    std r0, 64(r1)
 ; PPC64BE-NEXT:    #APP
 ; PPC64BE-NEXT:    #NO_APP
 ; PPC64BE-NEXT:    addi r1, r1, 48
@@ -76,8 +76,8 @@ define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 {
 ; PPC64LE-LABEL: ClobberLR_BR:
 ; PPC64LE:       # %bb.0: # %entry
 ; PPC64LE-NEXT:    mflr r0
-; PPC64LE-NEXT:    std r0, 16(r1)
 ; PPC64LE-NEXT:    stdu r1, -32(r1)
+; PPC64LE-NEXT:    std r0, 48(r1)
 ; PPC64LE-NEXT:    #APP
 ; PPC64LE-NEXT:    nop
 ; PPC64LE-NEXT:    #NO_APP
@@ -95,8 +95,8 @@ define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 {
 ; PPC64BE-LABEL: ClobberLR_BR:
 ; PPC64BE:       # %bb.0: # %entry
 ; PPC64BE-NEXT:    mflr r0
-; PPC64BE-NEXT:    std r0, 16(r1)
 ; PPC64BE-NEXT:    stdu r1, -48(r1)
+; PPC64BE-NEXT:    std r0, 64(r1)
 ; PPC64BE-NEXT:    #APP
 ; PPC64BE-NEXT:    nop
 ; PPC64BE-NEXT:    #NO_APP
@@ -166,8 +166,8 @@ define dso_local void @DefLR() local_unnamed_addr #0 {
 ; PPC64LE-LABEL: DefLR:
 ; PPC64LE:       # %bb.0: # %entry
 ; PPC64LE-NEXT:    mflr r0
-; PPC64LE-NEXT:    std r0, 16(r1)
 ; PPC64LE-NEXT:    stdu r1, -32(r1)
+; PPC64LE-NEXT:    std r0, 48(r1)
 ; PPC64LE-NEXT:    #APP
 ; PPC64LE-NEXT:    #NO_APP
 ; PPC64LE-NEXT:    addi r1, r1, 32
@@ -178,8 +178,8 @@ define dso_local void @DefLR() local_unnamed_addr #0 {
 ; PPC64BE-LABEL: DefLR:
 ; PPC64BE:       # %bb.0: # %entry
 ; PPC64BE-NEXT:    mflr r0
-; PPC64BE-NEXT:    std r0, 16(r1)
 ; PPC64BE-NEXT:    stdu r1, -48(r1)
+; PPC64BE-NEXT:    std r0, 64(r1)
 ; PPC64BE-NEXT:    #APP
 ; PPC64BE-NEXT:    #NO_APP
 ; PPC64BE-NEXT:    addi r1, r1, 48
@@ -195,8 +195,8 @@ define dso_local void @EarlyClobberLR() local_unnamed_addr #0 {
 ; PPC64LE-LABEL: EarlyClobberLR:
 ; PPC64LE:       # %bb.0: # %entry
 ; PPC64LE-NEXT:    mflr r0
-; PPC64LE-NEXT:    std r0, 16(r1)
 ; PPC64LE-NEXT:    stdu r1, -32(r1)
+; PPC64LE-NEXT:    std r0, 48(r1)
 ; PPC64LE-NEXT:    #APP
 ; PPC64LE-NEXT:    #NO_APP
 ; PPC64LE-NEXT:    addi r1, r1, 32
@@ -207,8 +207,8 @@ define dso_local void @EarlyClobberLR() local_unnamed_addr #0 {
 ; PPC64BE-LABEL: EarlyClobberLR:
 ; PPC64BE:       # %bb.0: # %entry
 ; PPC64BE-NEXT:    mflr r0
-; PPC64BE-NEXT:    std r0, 16(r1)
 ; PPC64BE-NEXT:    stdu r1, -48(r1)
+; PPC64BE-NEXT:    std r0, 64(r1)
 ; PPC64BE-NEXT:    #APP
 ; PPC64BE-NEXT:    #NO_APP
 ; PPC64BE-NEXT:    addi r1, r1, 48
@@ -226,8 +226,8 @@ define dso_local void @ClobberMulti() local_unnamed_addr #0 {
 ; PPC64LE-NEXT:    mflr r0
 ; PPC64LE-NEXT:    std r15, -136(r1) # 8-byte Folded Spill
 ; PPC64LE-NEXT:    std r16, -128(r1) # 8-byte Folded Spill
-; PPC64LE-NEXT:    std r0, 16(r1)
 ; PPC64LE-NEXT:    stdu r1, -176(r1)
+; PPC64LE-NEXT:    std r0, 192(r1)
 ; PPC64LE-NEXT:    #APP
 ; PPC64LE-NEXT:    #NO_APP
 ; PPC64LE-NEXT:    addi r1, r1, 176
@@ -240,8 +240,8 @@ define dso_local void @ClobberMulti() local_unnamed_addr #0 {
 ; PPC64BE-LABEL: ClobberMulti:
 ; PPC64BE:       # %bb.0: # %entry
 ; PPC64BE-NEXT:    mflr r0
-; PPC64BE-NEXT:    std r0, 16(r1)
 ; PPC64BE-NEXT:    stdu r1, -192(r1)
+; PPC64BE-NEXT:    std r0, 208(r1)
 ; PPC64BE-NEXT:    std r15, 56(r1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std r16, 64(r1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    #APP

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-nest.ll b/llvm/test/CodeGen/PowerPC/ppc64-nest.ll
index 32b85f50f4a5c..cdaa32cfb4664 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-nest.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-nest.ll
@@ -17,6 +17,7 @@ define ptr @nest_receiver(ptr nest %arg) nounwind {
 define ptr @nest_caller(ptr %arg) nounwind {
 ; CHECK-LABEL: nest_caller:
 ; CHECK: mr 11, 3
+; CHECK: std 0, 128(1)
 ; CHECK-NEXT: bl nest_receiver
 ; CHECK: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-notoc-rm-relocation.ll b/llvm/test/CodeGen/PowerPC/ppc64-notoc-rm-relocation.ll
index 635f472469508..0c9432e6484e6 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-notoc-rm-relocation.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-notoc-rm-relocation.ll
@@ -42,8 +42,8 @@ define dso_local signext i32 @main() #0 {
 ; LE-P9-LABEL: main:
 ; LE-P9:       # %bb.0: # %_main_entry
 ; LE-P9-NEXT:    mflr r0
-; LE-P9-NEXT:    std r0, 16(r1)
 ; LE-P9-NEXT:    stdu r1, -32(r1)
+; LE-P9-NEXT:    std r0, 48(r1)
 ; LE-P9-NEXT:    bl callee
 ; LE-P9-NEXT:    nop
 ; LE-P9-NEXT:    li r3, 0
@@ -55,8 +55,8 @@ define dso_local signext i32 @main() #0 {
 ; BE-P9-LABEL: main:
 ; BE-P9:       # %bb.0: # %_main_entry
 ; BE-P9-NEXT:    mflr r0
-; BE-P9-NEXT:    std r0, 16(r1)
 ; BE-P9-NEXT:    stdu r1, -112(r1)
+; BE-P9-NEXT:    std r0, 128(r1)
 ; BE-P9-NEXT:    bl callee
 ; BE-P9-NEXT:    nop
 ; BE-P9-NEXT:    li r3, 0

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
index a56636ad1d225..56105426e2f68 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll
@@ -66,9 +66,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P9-LABEL: caller:
 ; BE-P9:       # %bb.0: # %entry
 ; BE-P9-NEXT:    mflr r0
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -16(r1)
 ; BE-P9-NEXT:    stdu r1, -128(r1)
+; BE-P9-NEXT:    std r0, 144(r1)
+; BE-P9-NEXT:    hashst r0, -16(r1)
 ; BE-P9-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r31, r4
 ; BE-P9-NEXT:    bl .callee[PR]
@@ -85,9 +85,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-LABEL: caller:
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -16(r1)
 ; BE-P8-NEXT:    stdu r1, -128(r1)
+; BE-P8-NEXT:    std r0, 144(r1)
+; BE-P8-NEXT:    hashst r0, -16(r1)
 ; BE-P8-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r31, r4
 ; BE-P8-NEXT:    bl .callee[PR]
@@ -122,9 +122,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P9-LABEL: caller:
 ; BE-32BIT-P9:       # %bb.0: # %entry
 ; BE-32BIT-P9-NEXT:    mflr r0
-; BE-32BIT-P9-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P9-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    mr r31, r4
 ; BE-32BIT-P9-NEXT:    bl .callee[PR]
@@ -140,9 +140,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P8-LABEL: caller:
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P8-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r31, r4
 ; BE-32BIT-P8-NEXT:    bl .callee[PR]
@@ -177,9 +177,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P9-PRIV-LABEL: caller:
 ; BE-P9-PRIV:       # %bb.0: # %entry
 ; BE-P9-PRIV-NEXT:    mflr r0
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -128(r1)
+; BE-P9-PRIV-NEXT:    std r0, 144(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P9-PRIV-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r31, r4
 ; BE-P9-PRIV-NEXT:    bl .callee[PR]
@@ -196,9 +196,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-PRIV-LABEL: caller:
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -128(r1)
+; BE-P8-PRIV-NEXT:    std r0, 144(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r31, r4
 ; BE-P8-PRIV-NEXT:    bl .callee[PR]
@@ -233,9 +233,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P9-PRIV-LABEL: caller:
 ; BE-32BIT-P9-PRIV:       # %bb.0: # %entry
 ; BE-32BIT-P9-PRIV-NEXT:    mflr r0
-; BE-32BIT-P9-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P9-PRIV-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    mr r31, r4
 ; BE-32BIT-P9-PRIV-NEXT:    bl .callee[PR]
@@ -251,9 +251,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P8-PRIV-LABEL: caller:
 ; BE-32BIT-P8-PRIV:       # %bb.0: # %entry
 ; BE-32BIT-P8-PRIV-NEXT:    mflr r0
-; BE-32BIT-P8-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P8-PRIV-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    mr r31, r4
 ; BE-32BIT-P8-PRIV-NEXT:    bl .callee[PR]
@@ -406,39 +406,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-P9-LABEL: spill:
 ; BE-P9:       # %bb.0: # %entry
-; BE-P9-NEXT:    mflr r0
 ; BE-P9-NEXT:    mfcr r12
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -488(r1)
+; BE-P9-NEXT:    mflr r0
 ; BE-P9-NEXT:    stw r12, 8(r1)
 ; BE-P9-NEXT:    stdu r1, -624(r1)
-; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    std r0, 640(r1)
+; BE-P9-NEXT:    hashst r0, -488(r1)
 ; BE-P9-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
 ; BE-P9-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P9-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -535,23 +535,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mfcr r12
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -488(r1)
 ; BE-P8-NEXT:    stw r12, 8(r1)
 ; BE-P8-NEXT:    stdu r1, -624(r1)
 ; BE-P8-NEXT:    li r4, 144
+; BE-P8-NEXT:    std r0, 640(r1)
+; BE-P8-NEXT:    hashst r0, -488(r1)
 ; BE-P8-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 160
+; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 176
+; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
@@ -565,33 +565,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 192
+; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 304
-; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 320
-; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    lwz r4, 12(r3)
-; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
@@ -812,39 +812,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-32BIT-P9-LABEL: spill:
 ; BE-32BIT-P9:       # %bb.0: # %entry
-; BE-32BIT-P9-NEXT:    mflr r0
 ; BE-32BIT-P9-NEXT:    mfcr r12
-; BE-32BIT-P9-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -424(r1)
+; BE-32BIT-P9-NEXT:    mflr r0
 ; BE-32BIT-P9-NEXT:    stw r12, 4(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -496(r1)
-; BE-32BIT-P9-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-NEXT:    stw r0, 504(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P9-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v20, 80(r1) # 16-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v21, 96(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v22, 112(r1) # 16-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v23, 128(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v24, 144(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v25, 160(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v26, 176(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v27, 192(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v28, 208(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v29, 224(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v30, 240(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stxv v31, 256(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r29, 340(r1) # 4-byte Folded Spill
@@ -942,48 +942,48 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mfcr r12
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stw r12, 4(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -496(r1)
 ; BE-32BIT-P8-NEXT:    li r4, 80
+; BE-32BIT-P8-NEXT:    stw r0, 504(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 96
-; BE-32BIT-P8-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 112
-; BE-32BIT-P8-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 128
-; BE-32BIT-P8-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 144
-; BE-32BIT-P8-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 160
-; BE-32BIT-P8-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 176
-; BE-32BIT-P8-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 192
-; BE-32BIT-P8-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 208
-; BE-32BIT-P8-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 224
-; BE-32BIT-P8-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 240
-; BE-32BIT-P8-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 256
-; BE-32BIT-P8-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P8-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
@@ -1219,39 +1219,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-P9-PRIV-LABEL: spill:
 ; BE-P9-PRIV:       # %bb.0: # %entry
-; BE-P9-PRIV-NEXT:    mflr r0
 ; BE-P9-PRIV-NEXT:    mfcr r12
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
+; BE-P9-PRIV-NEXT:    mflr r0
 ; BE-P9-PRIV-NEXT:    stw r12, 8(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -624(r1)
-; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    std r0, 640(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P9-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -1348,23 +1348,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mfcr r12
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    stw r12, 8(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -624(r1)
 ; BE-P8-PRIV-NEXT:    li r4, 144
+; BE-P8-PRIV-NEXT:    std r0, 640(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 160
+; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 176
+; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
@@ -1378,33 +1378,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 192
+; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 304
-; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 320
-; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
@@ -1625,39 +1625,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-32BIT-P9-PRIV-LABEL: spill:
 ; BE-32BIT-P9-PRIV:       # %bb.0: # %entry
-; BE-32BIT-P9-PRIV-NEXT:    mflr r0
 ; BE-32BIT-P9-PRIV-NEXT:    mfcr r12
-; BE-32BIT-P9-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -424(r1)
+; BE-32BIT-P9-PRIV-NEXT:    mflr r0
 ; BE-32BIT-P9-PRIV-NEXT:    stw r12, 4(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stwu r1, -496(r1)
-; BE-32BIT-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-PRIV-NEXT:    stw r0, 504(r1)
+; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -424(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v20, 80(r1) # 16-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v21, 96(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P9-PRIV-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v22, 112(r1) # 16-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v23, 128(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v24, 144(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v25, 160(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v26, 176(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v27, 192(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v28, 208(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v29, 224(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v30, 240(r1) # 16-byte Folded Spill
-; BE-32BIT-P9-PRIV-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stxv v31, 256(r1) # 16-byte Folded Spill
+; BE-32BIT-P9-PRIV-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    stw r29, 340(r1) # 4-byte Folded Spill
@@ -1755,48 +1755,48 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-PRIV:       # %bb.0: # %entry
 ; BE-32BIT-P8-PRIV-NEXT:    mfcr r12
 ; BE-32BIT-P8-PRIV-NEXT:    mflr r0
-; BE-32BIT-P8-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -424(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r12, 4(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stwu r1, -496(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 80
+; BE-32BIT-P8-PRIV-NEXT:    stw r0, 504(r1)
+; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -424(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r13, 276(r1) # 4-byte Folded Spill
-; BE-32BIT-P8-PRIV-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 96
-; BE-32BIT-P8-PRIV-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r14, 280(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 112
-; BE-32BIT-P8-PRIV-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r15, 284(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 128
-; BE-32BIT-P8-PRIV-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r16, 288(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 144
-; BE-32BIT-P8-PRIV-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r17, 292(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 160
-; BE-32BIT-P8-PRIV-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r18, 296(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 176
-; BE-32BIT-P8-PRIV-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r19, 300(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 192
-; BE-32BIT-P8-PRIV-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r20, 304(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 208
-; BE-32BIT-P8-PRIV-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r21, 308(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 224
-; BE-32BIT-P8-PRIV-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r22, 312(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 240
-; BE-32BIT-P8-PRIV-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r23, 316(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    li r4, 256
-; BE-32BIT-P8-PRIV-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
+; BE-32BIT-P8-PRIV-NEXT:    stw r24, 320(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-32BIT-P8-PRIV-NEXT:    stw r25, 324(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r26, 328(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r27, 332(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    stw r28, 336(r1) # 4-byte Folded Spill
@@ -1953,9 +1953,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-NEXT:    beq cr0, L..BB2_2
 ; BE-P9-NEXT:  # %bb.1: # %if.end
 ; BE-P9-NEXT:    mflr r0
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -16(r1)
 ; BE-P9-NEXT:    stdu r1, -144(r1)
+; BE-P9-NEXT:    std r0, 160(r1)
+; BE-P9-NEXT:    hashst r0, -16(r1)
 ; BE-P9-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r31, r3
 ; BE-P9-NEXT:    lwz r3, 12(r3)
@@ -1982,9 +1982,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-NEXT:    beq cr0, L..BB2_2
 ; BE-P8-NEXT:  # %bb.1: # %if.end
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -16(r1)
 ; BE-P8-NEXT:    stdu r1, -144(r1)
+; BE-P8-NEXT:    std r0, 160(r1)
+; BE-P8-NEXT:    hashst r0, -16(r1)
 ; BE-P8-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r31, r3
 ; BE-P8-NEXT:    lwz r3, 12(r3)
@@ -2039,9 +2039,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-NEXT:    beq cr0, L..BB2_2
 ; BE-32BIT-P9-NEXT:  # %bb.1: # %if.end
 ; BE-32BIT-P9-NEXT:    mflr r0
-; BE-32BIT-P9-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P9-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    mr r31, r3
 ; BE-32BIT-P9-NEXT:    lwz r3, 12(r3)
@@ -2067,9 +2067,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    beq cr0, L..BB2_2
 ; BE-32BIT-P8-NEXT:  # %bb.1: # %if.end
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P8-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r31, r3
 ; BE-32BIT-P8-NEXT:    lwz r3, 12(r3)
@@ -2124,9 +2124,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-PRIV-NEXT:    beq cr0, L..BB2_2
 ; BE-P9-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-P9-PRIV-NEXT:    mflr r0
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P9-PRIV-NEXT:    std r0, 160(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P9-PRIV-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r31, r3
 ; BE-P9-PRIV-NEXT:    lwz r3, 12(r3)
@@ -2153,9 +2153,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    beq cr0, L..BB2_2
 ; BE-P8-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P8-PRIV-NEXT:    std r0, 160(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-P8-PRIV-NEXT:    std r31, 136(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r31, r3
 ; BE-P8-PRIV-NEXT:    lwz r3, 12(r3)
@@ -2210,9 +2210,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-PRIV-NEXT:    beq cr0, L..BB2_2
 ; BE-32BIT-P9-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-32BIT-P9-PRIV-NEXT:    mflr r0
-; BE-32BIT-P9-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P9-PRIV-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P9-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P9-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-PRIV-NEXT:    mr r31, r3
 ; BE-32BIT-P9-PRIV-NEXT:    lwz r3, 12(r3)
@@ -2238,9 +2238,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-PRIV-NEXT:    beq cr0, L..BB2_2
 ; BE-32BIT-P8-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-32BIT-P8-PRIV-NEXT:    mflr r0
-; BE-32BIT-P8-PRIV-NEXT:    stw r0, 8(r1)
-; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stwu r1, -80(r1)
+; BE-32BIT-P8-PRIV-NEXT:    stw r0, 88(r1)
+; BE-32BIT-P8-PRIV-NEXT:    hashstp r0, -16(r1)
 ; BE-32BIT-P8-PRIV-NEXT:    stw r31, 76(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-PRIV-NEXT:    mr r31, r3
 ; BE-32BIT-P8-PRIV-NEXT:    lwz r3, 12(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
index bf3020732ee9b..46b16728d9259 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
@@ -84,9 +84,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P9:       # %bb.0: # %entry
 ; LE-P9-NEXT:    mflr r0
 ; LE-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r0, 16(r1)
-; LE-P9-NEXT:    hashst r0, -24(r1)
 ; LE-P9-NEXT:    stdu r1, -64(r1)
+; LE-P9-NEXT:    std r0, 80(r1)
+; LE-P9-NEXT:    hashst r0, -24(r1)
 ; LE-P9-NEXT:    mr r30, r4
 ; LE-P9-NEXT:    bl callee
 ; LE-P9-NEXT:    nop
@@ -103,9 +103,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P8:       # %bb.0: # %entry
 ; LE-P8-NEXT:    mflr r0
 ; LE-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r0, 16(r1)
-; LE-P8-NEXT:    hashst r0, -24(r1)
 ; LE-P8-NEXT:    stdu r1, -64(r1)
+; LE-P8-NEXT:    std r0, 80(r1)
+; LE-P8-NEXT:    hashst r0, -24(r1)
 ; LE-P8-NEXT:    mr r30, r4
 ; LE-P8-NEXT:    bl callee
 ; LE-P8-NEXT:    nop
@@ -144,9 +144,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P9-O0-LABEL: caller:
 ; LE-P9-O0:       # %bb.0: # %entry
 ; LE-P9-O0-NEXT:    mflr r0
-; LE-P9-O0-NEXT:    std r0, 16(r1)
-; LE-P9-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P9-O0-NEXT:    stdu r1, -112(r1)
+; LE-P9-O0-NEXT:    std r0, 128(r1)
+; LE-P9-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P9-O0-NEXT:    # kill: def $r4 killed $r4 killed $x4
 ; LE-P9-O0-NEXT:    stw r4, 100(r1) # 4-byte Folded Spill
 ; LE-P9-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -165,9 +165,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P8-O0-LABEL: caller:
 ; LE-P8-O0:       # %bb.0: # %entry
 ; LE-P8-O0-NEXT:    mflr r0
-; LE-P8-O0-NEXT:    std r0, 16(r1)
-; LE-P8-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P8-O0-NEXT:    stdu r1, -112(r1)
+; LE-P8-O0-NEXT:    std r0, 128(r1)
+; LE-P8-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P8-O0-NEXT:    # kill: def $r4 killed $r4 killed $x4
 ; LE-P8-O0-NEXT:    stw r4, 100(r1) # 4-byte Folded Spill
 ; LE-P8-O0-NEXT:    # kill: def $r3 killed $r3 killed $x3
@@ -205,9 +205,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P9-LABEL: caller:
 ; BE-P9:       # %bb.0: # %entry
 ; BE-P9-NEXT:    mflr r0
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -24(r1)
 ; BE-P9-NEXT:    stdu r1, -144(r1)
+; BE-P9-NEXT:    std r0, 160(r1)
+; BE-P9-NEXT:    hashst r0, -24(r1)
 ; BE-P9-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r30, r4
 ; BE-P9-NEXT:    bl callee
@@ -224,9 +224,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-LABEL: caller:
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -24(r1)
 ; BE-P8-NEXT:    stdu r1, -144(r1)
+; BE-P8-NEXT:    std r0, 160(r1)
+; BE-P8-NEXT:    hashst r0, -24(r1)
 ; BE-P8-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r30, r4
 ; BE-P8-NEXT:    bl callee
@@ -260,9 +260,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P9-LABEL: caller:
 ; BE-32BIT-P9:       # %bb.0: # %entry
 ; BE-32BIT-P9-NEXT:    mflr r0
-; BE-32BIT-P9-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -32(r1)
+; BE-32BIT-P9-NEXT:    stw r0, 36(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    mr r30, r4
 ; BE-32BIT-P9-NEXT:    bl callee
@@ -277,9 +277,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-32BIT-P8-LABEL: caller:
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -32(r1)
+; BE-32BIT-P8-NEXT:    stw r0, 36(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    mr r30, r4
 ; BE-32BIT-P8-NEXT:    bl callee
@@ -313,9 +313,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P9-PRIV:       # %bb.0: # %entry
 ; LE-P9-PRIV-NEXT:    mflr r0
 ; LE-P9-PRIV-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r0, 16(r1)
-; LE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P9-PRIV-NEXT:    stdu r1, -64(r1)
+; LE-P9-PRIV-NEXT:    std r0, 80(r1)
+; LE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P9-PRIV-NEXT:    mr r30, r4
 ; LE-P9-PRIV-NEXT:    bl callee
 ; LE-P9-PRIV-NEXT:    nop
@@ -332,9 +332,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; LE-P8-PRIV:       # %bb.0: # %entry
 ; LE-P8-PRIV-NEXT:    mflr r0
 ; LE-P8-PRIV-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r0, 16(r1)
-; LE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    stdu r1, -64(r1)
+; LE-P8-PRIV-NEXT:    std r0, 80(r1)
+; LE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    mr r30, r4
 ; LE-P8-PRIV-NEXT:    bl callee
 ; LE-P8-PRIV-NEXT:    nop
@@ -369,9 +369,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P9-PRIV-LABEL: caller:
 ; BE-P9-PRIV:       # %bb.0: # %entry
 ; BE-P9-PRIV-NEXT:    mflr r0
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P9-PRIV-NEXT:    std r0, 160(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P9-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r30, r4
 ; BE-P9-PRIV-NEXT:    bl callee
@@ -388,9 +388,9 @@ define dso_local zeroext i32 @caller(i32 zeroext %in, i32 zeroext %add_after) #0
 ; BE-P8-PRIV-LABEL: caller:
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P8-PRIV-NEXT:    std r0, 160(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r30, r4
 ; BE-P8-PRIV-NEXT:    bl callee
@@ -542,39 +542,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; LE-P9-LABEL: spill:
 ; LE-P9:       # %bb.0: # %entry
-; LE-P9-NEXT:    mflr r0
 ; LE-P9-NEXT:    mfcr r12
-; LE-P9-NEXT:    std r0, 16(r1)
-; LE-P9-NEXT:    hashst r0, -488(r1)
+; LE-P9-NEXT:    mflr r0
 ; LE-P9-NEXT:    stw r12, 8(r1)
 ; LE-P9-NEXT:    stdu r1, -544(r1)
-; LE-P9-NEXT:    lwz r4, 12(r3)
+; LE-P9-NEXT:    std r0, 560(r1)
+; LE-P9-NEXT:    hashst r0, -488(r1)
 ; LE-P9-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
 ; LE-P9-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P9-NEXT:    lwz r4, 12(r3)
+; LE-P9-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
 ; LE-P9-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P9-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P9-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P9-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
+; LE-P9-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
+; LE-P9-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P9-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
@@ -671,23 +671,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8:       # %bb.0: # %entry
 ; LE-P8-NEXT:    mfcr r12
 ; LE-P8-NEXT:    mflr r0
-; LE-P8-NEXT:    std r0, 16(r1)
-; LE-P8-NEXT:    hashst r0, -488(r1)
 ; LE-P8-NEXT:    stw r12, 8(r1)
 ; LE-P8-NEXT:    stdu r1, -544(r1)
 ; LE-P8-NEXT:    li r4, 64
+; LE-P8-NEXT:    std r0, 560(r1)
+; LE-P8-NEXT:    hashst r0, -488(r1)
 ; LE-P8-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 80
+; LE-P8-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 96
+; LE-P8-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
@@ -701,33 +701,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 112
+; LE-P8-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 128
-; LE-P8-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 144
-; LE-P8-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 160
-; LE-P8-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 176
-; LE-P8-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 192
-; LE-P8-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 208
-; LE-P8-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 224
-; LE-P8-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    li r4, 240
-; LE-P8-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
+; LE-P8-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; LE-P8-NEXT:    lwz r4, 12(r3)
-; LE-P8-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
 ; LE-P8-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
@@ -950,10 +950,10 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P9-O0:       # %bb.0: # %entry
 ; LE-P9-O0-NEXT:    mflr r0
 ; LE-P9-O0-NEXT:    mfcr r12
-; LE-P9-O0-NEXT:    std r0, 16(r1)
-; LE-P9-O0-NEXT:    hashst r0, -488(r1)
 ; LE-P9-O0-NEXT:    stw r12, 8(r1)
 ; LE-P9-O0-NEXT:    stdu r1, -608(r1)
+; LE-P9-O0-NEXT:    std r0, 624(r1)
+; LE-P9-O0-NEXT:    hashst r0, -488(r1)
 ; LE-P9-O0-NEXT:    std r14, 320(r1) # 8-byte Folded Spill
 ; LE-P9-O0-NEXT:    std r15, 328(r1) # 8-byte Folded Spill
 ; LE-P9-O0-NEXT:    std r16, 336(r1) # 8-byte Folded Spill
@@ -1078,10 +1078,10 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-O0:       # %bb.0: # %entry
 ; LE-P8-O0-NEXT:    mflr r0
 ; LE-P8-O0-NEXT:    mfcr r12
-; LE-P8-O0-NEXT:    std r0, 16(r1)
-; LE-P8-O0-NEXT:    hashst r0, -488(r1)
 ; LE-P8-O0-NEXT:    stw r12, 8(r1)
 ; LE-P8-O0-NEXT:    stdu r1, -608(r1)
+; LE-P8-O0-NEXT:    std r0, 624(r1)
+; LE-P8-O0-NEXT:    hashst r0, -488(r1)
 ; LE-P8-O0-NEXT:    std r14, 320(r1) # 8-byte Folded Spill
 ; LE-P8-O0-NEXT:    std r15, 328(r1) # 8-byte Folded Spill
 ; LE-P8-O0-NEXT:    std r16, 336(r1) # 8-byte Folded Spill
@@ -1355,39 +1355,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-P9-LABEL: spill:
 ; BE-P9:       # %bb.0: # %entry
-; BE-P9-NEXT:    mflr r0
 ; BE-P9-NEXT:    mfcr r12
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -488(r1)
+; BE-P9-NEXT:    mflr r0
 ; BE-P9-NEXT:    stw r12, 8(r1)
 ; BE-P9-NEXT:    stdu r1, -624(r1)
-; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    std r0, 640(r1)
+; BE-P9-NEXT:    hashst r0, -488(r1)
 ; BE-P9-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
 ; BE-P9-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    lwz r4, 12(r3)
+; BE-P9-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P9-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
-; BE-P9-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P9-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
+; BE-P9-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
+; BE-P9-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -1484,23 +1484,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8:       # %bb.0: # %entry
 ; BE-P8-NEXT:    mfcr r12
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -488(r1)
 ; BE-P8-NEXT:    stw r12, 8(r1)
 ; BE-P8-NEXT:    stdu r1, -624(r1)
 ; BE-P8-NEXT:    li r4, 144
+; BE-P8-NEXT:    std r0, 640(r1)
+; BE-P8-NEXT:    hashst r0, -488(r1)
 ; BE-P8-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 160
+; BE-P8-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 176
+; BE-P8-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
@@ -1514,33 +1514,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 192
+; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 208
-; BE-P8-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 224
-; BE-P8-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 240
-; BE-P8-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 256
-; BE-P8-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 272
-; BE-P8-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 288
-; BE-P8-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 304
-; BE-P8-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    li r4, 320
-; BE-P8-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-NEXT:    lwz r4, 12(r3)
-; BE-P8-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
@@ -1759,10 +1759,10 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P9-LABEL: spill:
 ; BE-32BIT-P9:       # %bb.0: # %entry
 ; BE-32BIT-P9-NEXT:    mflr r0
-; BE-32BIT-P9-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -448(r1)
 ; BE-32BIT-P9-NEXT:    mfcr r12
+; BE-32BIT-P9-NEXT:    stw r0, 452(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P9-NEXT:    stw r14, 232(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r15, 236(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    stw r16, 240(r1) # 4-byte Folded Spill
@@ -1884,11 +1884,11 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-LABEL: spill:
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -448(r1)
 ; BE-32BIT-P8-NEXT:    mfcr r12
 ; BE-32BIT-P8-NEXT:    li r4, 32
+; BE-32BIT-P8-NEXT:    stw r0, 452(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -424(r1)
 ; BE-32BIT-P8-NEXT:    stw r14, 232(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r15, 236(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stw r16, 240(r1) # 4-byte Folded Spill
@@ -1910,39 +1910,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-32BIT-P8-NEXT:    stw r12, 228(r1)
 ; BE-32BIT-P8-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 48
+; BE-32BIT-P8-NEXT:    stfd f14, 304(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 64
-; BE-32BIT-P8-NEXT:    stfd f14, 304(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f15, 312(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 80
-; BE-32BIT-P8-NEXT:    stfd f15, 312(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f16, 320(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 96
-; BE-32BIT-P8-NEXT:    stfd f16, 320(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f17, 328(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 112
-; BE-32BIT-P8-NEXT:    stfd f17, 328(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f18, 336(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 128
-; BE-32BIT-P8-NEXT:    stfd f18, 336(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f19, 344(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 144
-; BE-32BIT-P8-NEXT:    stfd f19, 344(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f20, 352(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 160
-; BE-32BIT-P8-NEXT:    stfd f20, 352(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f21, 360(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 176
-; BE-32BIT-P8-NEXT:    stfd f21, 360(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f22, 368(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 192
-; BE-32BIT-P8-NEXT:    stfd f22, 368(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f23, 376(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    li r4, 208
-; BE-32BIT-P8-NEXT:    stfd f23, 376(r1) # 8-byte Folded Spill
+; BE-32BIT-P8-NEXT:    stfd f24, 384(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    lwz r4, 12(r3)
-; BE-32BIT-P8-NEXT:    stfd f24, 384(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f25, 392(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f26, 400(r1) # 8-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    stfd f27, 408(r1) # 8-byte Folded Spill
@@ -2158,39 +2158,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; LE-P9-PRIV-LABEL: spill:
 ; LE-P9-PRIV:       # %bb.0: # %entry
-; LE-P9-PRIV-NEXT:    mflr r0
 ; LE-P9-PRIV-NEXT:    mfcr r12
-; LE-P9-PRIV-NEXT:    std r0, 16(r1)
-; LE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
+; LE-P9-PRIV-NEXT:    mflr r0
 ; LE-P9-PRIV-NEXT:    stw r12, 8(r1)
 ; LE-P9-PRIV-NEXT:    stdu r1, -544(r1)
-; LE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; LE-P9-PRIV-NEXT:    std r0, 560(r1)
+; LE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
 ; LE-P9-PRIV-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v20, 64(r1) # 16-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v21, 80(r1) # 16-byte Folded Spill
+; LE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; LE-P9-PRIV-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v22, 96(r1) # 16-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v23, 112(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v24, 128(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v25, 144(r1) # 16-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v26, 160(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v27, 176(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v28, 192(r1) # 16-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v29, 208(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r25, 344(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v30, 224(r1) # 16-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r26, 352(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    stxv v31, 240(r1) # 16-byte Folded Spill
+; LE-P9-PRIV-NEXT:    std r27, 360(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r28, 368(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r29, 376(r1) # 8-byte Folded Spill
 ; LE-P9-PRIV-NEXT:    std r30, 384(r1) # 8-byte Folded Spill
@@ -2287,23 +2287,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV:       # %bb.0: # %entry
 ; LE-P8-PRIV-NEXT:    mfcr r12
 ; LE-P8-PRIV-NEXT:    mflr r0
-; LE-P8-PRIV-NEXT:    std r0, 16(r1)
-; LE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; LE-P8-PRIV-NEXT:    stw r12, 8(r1)
 ; LE-P8-PRIV-NEXT:    stdu r1, -544(r1)
 ; LE-P8-PRIV-NEXT:    li r4, 64
+; LE-P8-PRIV-NEXT:    std r0, 560(r1)
+; LE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; LE-P8-PRIV-NEXT:    std r14, 256(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r15, 264(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r16, 272(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 80
+; LE-P8-PRIV-NEXT:    std r17, 280(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r18, 288(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r19, 296(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r20, 304(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 96
+; LE-P8-PRIV-NEXT:    std r21, 312(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r22, 320(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r23, 328(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    std r24, 336(r1) # 8-byte Folded Spill
@@ -2317,33 +2317,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:    std r3, 40(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 112
+; LE-P8-PRIV-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 128
-; LE-P8-PRIV-NEXT:    stfd f14, 400(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 144
-; LE-P8-PRIV-NEXT:    stfd f15, 408(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 160
-; LE-P8-PRIV-NEXT:    stfd f16, 416(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 176
-; LE-P8-PRIV-NEXT:    stfd f17, 424(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 192
-; LE-P8-PRIV-NEXT:    stfd f18, 432(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 208
-; LE-P8-PRIV-NEXT:    stfd f19, 440(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 224
-; LE-P8-PRIV-NEXT:    stfd f20, 448(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    li r4, 240
-; LE-P8-PRIV-NEXT:    stfd f21, 456(r1) # 8-byte Folded Spill
+; LE-P8-PRIV-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; LE-P8-PRIV-NEXT:    stfd f22, 464(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stfd f23, 472(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stfd f24, 480(r1) # 8-byte Folded Spill
 ; LE-P8-PRIV-NEXT:    stfd f25, 488(r1) # 8-byte Folded Spill
@@ -2563,39 +2563,39 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ;
 ; BE-P9-PRIV-LABEL: spill:
 ; BE-P9-PRIV:       # %bb.0: # %entry
-; BE-P9-PRIV-NEXT:    mflr r0
 ; BE-P9-PRIV-NEXT:    mfcr r12
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
+; BE-P9-PRIV-NEXT:    mflr r0
 ; BE-P9-PRIV-NEXT:    stw r12, 8(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -624(r1)
-; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    std r0, 640(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P9-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v20, 144(r1) # 16-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v21, 160(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    lwz r4, 12(r3)
+; BE-P9-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v22, 176(r1) # 16-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v23, 192(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v24, 208(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v25, 224(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v26, 240(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v27, 256(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v28, 272(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v29, 288(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r25, 424(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v30, 304(r1) # 16-byte Folded Spill
-; BE-P9-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r26, 432(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    stxv v31, 320(r1) # 16-byte Folded Spill
+; BE-P9-PRIV-NEXT:    std r27, 440(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r28, 448(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r29, 456(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    std r30, 464(r1) # 8-byte Folded Spill
@@ -2692,23 +2692,23 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV:       # %bb.0: # %entry
 ; BE-P8-PRIV-NEXT:    mfcr r12
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    stw r12, 8(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -624(r1)
 ; BE-P8-PRIV-NEXT:    li r4, 144
+; BE-P8-PRIV-NEXT:    std r0, 640(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -488(r1)
 ; BE-P8-PRIV-NEXT:    std r14, 336(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r15, 344(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r16, 352(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 160
+; BE-P8-PRIV-NEXT:    std r17, 360(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r18, 368(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r19, 376(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r20, 384(r1) # 8-byte Folded Spill
-; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 176
+; BE-P8-PRIV-NEXT:    std r21, 392(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r22, 400(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r23, 408(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    std r24, 416(r1) # 8-byte Folded Spill
@@ -2722,33 +2722,33 @@ define dso_local zeroext i32 @spill(ptr nocapture readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    std r3, 120(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 192
+; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 208
-; BE-P8-PRIV-NEXT:    stfd f14, 480(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 224
-; BE-P8-PRIV-NEXT:    stfd f15, 488(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 240
-; BE-P8-PRIV-NEXT:    stfd f16, 496(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 256
-; BE-P8-PRIV-NEXT:    stfd f17, 504(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 272
-; BE-P8-PRIV-NEXT:    stfd f18, 512(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 288
-; BE-P8-PRIV-NEXT:    stfd f19, 520(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 304
-; BE-P8-PRIV-NEXT:    stfd f20, 528(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    li r4, 320
-; BE-P8-PRIV-NEXT:    stfd f21, 536(r1) # 8-byte Folded Spill
+; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    lwz r4, 12(r3)
-; BE-P8-PRIV-NEXT:    stfd f22, 544(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f23, 552(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f24, 560(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    stfd f25, 568(r1) # 8-byte Folded Spill
@@ -2889,9 +2889,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-NEXT:  # %bb.1: # %if.end
 ; LE-P9-NEXT:    mflr r0
 ; LE-P9-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P9-NEXT:    std r0, 16(r1)
-; LE-P9-NEXT:    hashst r0, -24(r1)
 ; LE-P9-NEXT:    stdu r1, -64(r1)
+; LE-P9-NEXT:    std r0, 80(r1)
+; LE-P9-NEXT:    hashst r0, -24(r1)
 ; LE-P9-NEXT:    mr r30, r3
 ; LE-P9-NEXT:    lwz r3, 12(r3)
 ; LE-P9-NEXT:    stw r3, 36(r1)
@@ -2918,9 +2918,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-NEXT:  # %bb.1: # %if.end
 ; LE-P8-NEXT:    mflr r0
 ; LE-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P8-NEXT:    std r0, 16(r1)
-; LE-P8-NEXT:    hashst r0, -24(r1)
 ; LE-P8-NEXT:    stdu r1, -64(r1)
+; LE-P8-NEXT:    std r0, 80(r1)
+; LE-P8-NEXT:    hashst r0, -24(r1)
 ; LE-P8-NEXT:    mr r30, r3
 ; LE-P8-NEXT:    lwz r3, 12(r3)
 ; LE-P8-NEXT:    stw r3, 36(r1)
@@ -2975,9 +2975,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-O0-LABEL: shrinkwrap:
 ; LE-P9-O0:       # %bb.0: # %entry
 ; LE-P9-O0-NEXT:    mflr r0
-; LE-P9-O0-NEXT:    std r0, 16(r1)
-; LE-P9-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P9-O0-NEXT:    stdu r1, -128(r1)
+; LE-P9-O0-NEXT:    std r0, 144(r1)
+; LE-P9-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P9-O0-NEXT:    mr. r4, r3
 ; LE-P9-O0-NEXT:    std r4, 104(r1) # 8-byte Folded Spill
 ; LE-P9-O0-NEXT:    li r3, 0
@@ -3007,9 +3007,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-O0-LABEL: shrinkwrap:
 ; LE-P8-O0:       # %bb.0: # %entry
 ; LE-P8-O0-NEXT:    mflr r0
-; LE-P8-O0-NEXT:    std r0, 16(r1)
-; LE-P8-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P8-O0-NEXT:    stdu r1, -128(r1)
+; LE-P8-O0-NEXT:    std r0, 144(r1)
+; LE-P8-O0-NEXT:    hashst r0, -8(r1)
 ; LE-P8-O0-NEXT:    mr. r4, r3
 ; LE-P8-O0-NEXT:    std r4, 104(r1) # 8-byte Folded Spill
 ; LE-P8-O0-NEXT:    li r3, 0
@@ -3071,9 +3071,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-NEXT:    beq cr0, .LBB2_2
 ; BE-P9-NEXT:  # %bb.1: # %if.end
 ; BE-P9-NEXT:    mflr r0
-; BE-P9-NEXT:    std r0, 16(r1)
-; BE-P9-NEXT:    hashst r0, -24(r1)
 ; BE-P9-NEXT:    stdu r1, -144(r1)
+; BE-P9-NEXT:    std r0, 160(r1)
+; BE-P9-NEXT:    hashst r0, -24(r1)
 ; BE-P9-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-NEXT:    mr r30, r3
 ; BE-P9-NEXT:    lwz r3, 12(r3)
@@ -3100,9 +3100,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-NEXT:    beq cr0, .LBB2_2
 ; BE-P8-NEXT:  # %bb.1: # %if.end
 ; BE-P8-NEXT:    mflr r0
-; BE-P8-NEXT:    std r0, 16(r1)
-; BE-P8-NEXT:    hashst r0, -24(r1)
 ; BE-P8-NEXT:    stdu r1, -144(r1)
+; BE-P8-NEXT:    std r0, 160(r1)
+; BE-P8-NEXT:    hashst r0, -24(r1)
 ; BE-P8-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-NEXT:    mr r30, r3
 ; BE-P8-NEXT:    lwz r3, 12(r3)
@@ -3154,10 +3154,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P9-LABEL: shrinkwrap:
 ; BE-32BIT-P9:       # %bb.0: # %entry
 ; BE-32BIT-P9-NEXT:    mflr r0
-; BE-32BIT-P9-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stwu r1, -32(r1)
 ; BE-32BIT-P9-NEXT:    cmplwi r3, 0
+; BE-32BIT-P9-NEXT:    stw r0, 36(r1)
+; BE-32BIT-P9-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P9-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P9-NEXT:    beq cr0, .LBB2_2
 ; BE-32BIT-P9-NEXT:  # %bb.1: # %if.end
@@ -3182,10 +3182,10 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-32BIT-P8-LABEL: shrinkwrap:
 ; BE-32BIT-P8:       # %bb.0: # %entry
 ; BE-32BIT-P8-NEXT:    mflr r0
-; BE-32BIT-P8-NEXT:    stw r0, 4(r1)
-; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stwu r1, -32(r1)
 ; BE-32BIT-P8-NEXT:    cmplwi r3, 0
+; BE-32BIT-P8-NEXT:    stw r0, 36(r1)
+; BE-32BIT-P8-NEXT:    hashst r0, -16(r1)
 ; BE-32BIT-P8-NEXT:    stw r30, 24(r1) # 4-byte Folded Spill
 ; BE-32BIT-P8-NEXT:    beq cr0, .LBB2_2
 ; BE-32BIT-P8-NEXT:  # %bb.1: # %if.end
@@ -3242,9 +3242,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P9-PRIV-NEXT:  # %bb.1: # %if.end
 ; LE-P9-PRIV-NEXT:    mflr r0
 ; LE-P9-PRIV-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P9-PRIV-NEXT:    std r0, 16(r1)
-; LE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P9-PRIV-NEXT:    stdu r1, -64(r1)
+; LE-P9-PRIV-NEXT:    std r0, 80(r1)
+; LE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P9-PRIV-NEXT:    mr r30, r3
 ; LE-P9-PRIV-NEXT:    lwz r3, 12(r3)
 ; LE-P9-PRIV-NEXT:    stw r3, 36(r1)
@@ -3271,9 +3271,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; LE-P8-PRIV-NEXT:  # %bb.1: # %if.end
 ; LE-P8-PRIV-NEXT:    mflr r0
 ; LE-P8-PRIV-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; LE-P8-PRIV-NEXT:    std r0, 16(r1)
-; LE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    stdu r1, -64(r1)
+; LE-P8-PRIV-NEXT:    std r0, 80(r1)
+; LE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; LE-P8-PRIV-NEXT:    mr r30, r3
 ; LE-P8-PRIV-NEXT:    lwz r3, 12(r3)
 ; LE-P8-PRIV-NEXT:    stw r3, 36(r1)
@@ -3328,9 +3328,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P9-PRIV-NEXT:    beq cr0, .LBB2_2
 ; BE-P9-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-P9-PRIV-NEXT:    mflr r0
-; BE-P9-PRIV-NEXT:    std r0, 16(r1)
-; BE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P9-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P9-PRIV-NEXT:    std r0, 160(r1)
+; BE-P9-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P9-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P9-PRIV-NEXT:    mr r30, r3
 ; BE-P9-PRIV-NEXT:    lwz r3, 12(r3)
@@ -3357,9 +3357,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
 ; BE-P8-PRIV-NEXT:    beq cr0, .LBB2_2
 ; BE-P8-PRIV-NEXT:  # %bb.1: # %if.end
 ; BE-P8-PRIV-NEXT:    mflr r0
-; BE-P8-PRIV-NEXT:    std r0, 16(r1)
-; BE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    stdu r1, -144(r1)
+; BE-P8-PRIV-NEXT:    std r0, 160(r1)
+; BE-P8-PRIV-NEXT:    hashstp r0, -24(r1)
 ; BE-P8-PRIV-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
 ; BE-P8-PRIV-NEXT:    mr r30, r3
 ; BE-P8-PRIV-NEXT:    lwz r3, 12(r3)
@@ -3577,9 +3577,9 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; LE-P9-O0:       # %bb.0: # %entry
 ; LE-P9-O0-NEXT:    mflr r0
 ; LE-P9-O0-NEXT:    std r30, -16(r1)
+; LE-P9-O0-NEXT:    mr r30, r1
 ; LE-P9-O0-NEXT:    std r0, 16(r1)
 ; LE-P9-O0-NEXT:    hashst r0, -24(r1)
-; LE-P9-O0-NEXT:    mr r30, r1
 ; LE-P9-O0-NEXT:    clrldi r0, r1, 49
 ; LE-P9-O0-NEXT:    lis r12, -1
 ; LE-P9-O0-NEXT:    subc r0, r12, r0
@@ -3620,9 +3620,9 @@ define dso_local zeroext i32 @aligned(ptr nocapture readonly %in) #0 {
 ; LE-P8-O0:       # %bb.0: # %entry
 ; LE-P8-O0-NEXT:    mflr r0
 ; LE-P8-O0-NEXT:    std r30, -16(r1)
+; LE-P8-O0-NEXT:    mr r30, r1
 ; LE-P8-O0-NEXT:    std r0, 16(r1)
 ; LE-P8-O0-NEXT:    hashst r0, -24(r1)
-; LE-P8-O0-NEXT:    mr r30, r1
 ; LE-P8-O0-NEXT:    clrldi r0, r1, 49
 ; LE-P8-O0-NEXT:    lis r12, -1
 ; LE-P8-O0-NEXT:    subc r0, r12, r0

diff  --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index 22d29af9b73be..bfb4f05c231b4 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -7,8 +7,8 @@ define ppc_fp128 @test_fadd_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_fadd_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __gcc_qadd
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -19,8 +19,8 @@ define ppc_fp128 @test_fadd_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_fadd_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __gcc_qadd
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -31,8 +31,8 @@ define ppc_fp128 @test_fadd_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_fadd_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __gcc_qadd
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -52,8 +52,8 @@ define ppc_fp128 @test_fsub_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_fsub_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __gcc_qsub
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -64,8 +64,8 @@ define ppc_fp128 @test_fsub_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_fsub_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __gcc_qsub
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -76,8 +76,8 @@ define ppc_fp128 @test_fsub_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_fsub_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __gcc_qsub
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -97,8 +97,8 @@ define ppc_fp128 @test_fmul_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_fmul_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __gcc_qmul
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -109,8 +109,8 @@ define ppc_fp128 @test_fmul_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_fmul_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __gcc_qmul
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -121,8 +121,8 @@ define ppc_fp128 @test_fmul_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_fmul_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __gcc_qmul
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -142,8 +142,8 @@ define ppc_fp128 @test_fdiv_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_fdiv_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __gcc_qdiv
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -154,8 +154,8 @@ define ppc_fp128 @test_fdiv_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_fdiv_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __gcc_qdiv
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -166,8 +166,8 @@ define ppc_fp128 @test_fdiv_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_fdiv_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __gcc_qdiv
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -187,8 +187,8 @@ define ppc_fp128 @test_frem_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_frem_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fmodl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -199,8 +199,8 @@ define ppc_fp128 @test_frem_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_frem_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fmodl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -211,8 +211,8 @@ define ppc_fp128 @test_frem_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_frem_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl fmodl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -232,8 +232,8 @@ define ppc_fp128 @test_fma_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second, ppc_fp
 ; PC64LE-LABEL: test_fma_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fmal
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -244,8 +244,8 @@ define ppc_fp128 @test_fma_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second, ppc_fp
 ; PC64LE9-LABEL: test_fma_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fmal
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -256,8 +256,8 @@ define ppc_fp128 @test_fma_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second, ppc_fp
 ; PC64-LABEL: test_fma_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl fmal
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -278,8 +278,8 @@ define ppc_fp128 @test_sqrt_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_sqrt_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl sqrtl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -290,8 +290,8 @@ define ppc_fp128 @test_sqrt_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_sqrt_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl sqrtl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -302,8 +302,8 @@ define ppc_fp128 @test_sqrt_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_sqrt_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl sqrtl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -322,8 +322,8 @@ define ppc_fp128 @test_pow_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE-LABEL: test_pow_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl powl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -334,8 +334,8 @@ define ppc_fp128 @test_pow_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64LE9-LABEL: test_pow_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl powl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -346,8 +346,8 @@ define ppc_fp128 @test_pow_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0 {
 ; PC64-LABEL: test_pow_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl powl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -367,9 +367,9 @@ define ppc_fp128 @test_powi_ppc_fp128(ppc_fp128 %first, i32 %second) #0 {
 ; PC64LE-LABEL: test_powi_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
 ; PC64LE-NEXT:    clrldi 5, 5, 32
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __powitf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -380,9 +380,9 @@ define ppc_fp128 @test_powi_ppc_fp128(ppc_fp128 %first, i32 %second) #0 {
 ; PC64LE9-LABEL: test_powi_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
 ; PC64LE9-NEXT:    clrldi 5, 5, 32
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __powitf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -393,9 +393,9 @@ define ppc_fp128 @test_powi_ppc_fp128(ppc_fp128 %first, i32 %second) #0 {
 ; PC64-LABEL: test_powi_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
 ; PC64-NEXT:    clrldi 5, 5, 32
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __powitf2
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -415,8 +415,8 @@ define ppc_fp128 @test_sin_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_sin_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl sinl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -427,8 +427,8 @@ define ppc_fp128 @test_sin_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_sin_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl sinl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -439,8 +439,8 @@ define ppc_fp128 @test_sin_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_sin_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl sinl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -459,8 +459,8 @@ define ppc_fp128 @test_cos_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_cos_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl cosl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -471,8 +471,8 @@ define ppc_fp128 @test_cos_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_cos_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl cosl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -483,8 +483,8 @@ define ppc_fp128 @test_cos_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_cos_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl cosl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -503,8 +503,8 @@ define ppc_fp128 @test_exp_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_exp_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl expl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -515,8 +515,8 @@ define ppc_fp128 @test_exp_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_exp_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl expl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -527,8 +527,8 @@ define ppc_fp128 @test_exp_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_exp_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl expl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -547,8 +547,8 @@ define ppc_fp128 @test_exp2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_exp2_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl exp2l
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -559,8 +559,8 @@ define ppc_fp128 @test_exp2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_exp2_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl exp2l
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -571,8 +571,8 @@ define ppc_fp128 @test_exp2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_exp2_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl exp2l
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -591,8 +591,8 @@ define ppc_fp128 @test_log_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_log_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl logl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -603,8 +603,8 @@ define ppc_fp128 @test_log_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_log_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl logl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -615,8 +615,8 @@ define ppc_fp128 @test_log_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_log_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl logl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -635,8 +635,8 @@ define ppc_fp128 @test_log2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_log2_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl log2l
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -647,8 +647,8 @@ define ppc_fp128 @test_log2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_log2_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl log2l
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -659,8 +659,8 @@ define ppc_fp128 @test_log2_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_log2_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl log2l
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -679,8 +679,8 @@ define ppc_fp128 @test_log10_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_log10_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl log10l
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -691,8 +691,8 @@ define ppc_fp128 @test_log10_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_log10_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl log10l
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -703,8 +703,8 @@ define ppc_fp128 @test_log10_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_log10_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl log10l
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -723,8 +723,8 @@ define ppc_fp128 @test_rint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_rint_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl rintl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -735,8 +735,8 @@ define ppc_fp128 @test_rint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_rint_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl rintl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -747,8 +747,8 @@ define ppc_fp128 @test_rint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_rint_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl rintl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -767,8 +767,8 @@ define ppc_fp128 @test_nearbyint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_nearbyint_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl nearbyintl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -779,8 +779,8 @@ define ppc_fp128 @test_nearbyint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_nearbyint_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl nearbyintl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -791,8 +791,8 @@ define ppc_fp128 @test_nearbyint_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_nearbyint_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl nearbyintl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -811,8 +811,8 @@ define ppc_fp128 @test_maxnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64LE-LABEL: test_maxnum_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fmaxl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -823,8 +823,8 @@ define ppc_fp128 @test_maxnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64LE9-LABEL: test_maxnum_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fmaxl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -835,8 +835,8 @@ define ppc_fp128 @test_maxnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64-LABEL: test_maxnum_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl fmaxl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -855,8 +855,8 @@ define ppc_fp128 @test_minnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64LE-LABEL: test_minnum_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fminl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -867,8 +867,8 @@ define ppc_fp128 @test_minnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64LE9-LABEL: test_minnum_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fminl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -879,8 +879,8 @@ define ppc_fp128 @test_minnum_ppc_fp128(ppc_fp128 %first, ppc_fp128 %second) #0
 ; PC64-LABEL: test_minnum_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl fminl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -899,8 +899,8 @@ define ppc_fp128 @test_ceil_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_ceil_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl ceill
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -911,8 +911,8 @@ define ppc_fp128 @test_ceil_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_ceil_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl ceill
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -923,8 +923,8 @@ define ppc_fp128 @test_ceil_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_ceil_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl ceill
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -942,8 +942,8 @@ define ppc_fp128 @test_floor_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_floor_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl floorl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -954,8 +954,8 @@ define ppc_fp128 @test_floor_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_floor_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl floorl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -966,8 +966,8 @@ define ppc_fp128 @test_floor_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_floor_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl floorl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -985,8 +985,8 @@ define ppc_fp128 @test_round_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_round_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl roundl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -997,8 +997,8 @@ define ppc_fp128 @test_round_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_round_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl roundl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1009,8 +1009,8 @@ define ppc_fp128 @test_round_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_round_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl roundl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1028,8 +1028,8 @@ define ppc_fp128 @test_trunc_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_trunc_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl truncl
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1040,8 +1040,8 @@ define ppc_fp128 @test_trunc_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_trunc_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl truncl
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1052,8 +1052,8 @@ define ppc_fp128 @test_trunc_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_trunc_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl truncl
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1160,8 +1160,8 @@ define i64 @test_fptosi_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_fptosi_ppc_i64_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __fixtfdi
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1172,8 +1172,8 @@ define i64 @test_fptosi_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_fptosi_ppc_i64_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __fixtfdi
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1184,8 +1184,8 @@ define i64 @test_fptosi_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_fptosi_ppc_i64_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __fixtfdi
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1244,8 +1244,8 @@ define i64 @test_fptoui_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE-LABEL: test_fptoui_ppc_i64_ppc_fp128:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __fixunstfdi
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1256,8 +1256,8 @@ define i64 @test_fptoui_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9-LABEL: test_fptoui_ppc_i64_ppc_fp128:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __fixunstfdi
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1268,8 +1268,8 @@ define i64 @test_fptoui_ppc_i64_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64-LABEL: test_fptoui_ppc_i64_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __fixunstfdi
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1288,10 +1288,10 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
 ; PC64LE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -48(1)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
 ; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    std 0, 64(1)
 ; PC64LE-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
 ; PC64LE-NEXT:    lis 3, -32768
 ; PC64LE-NEXT:    fcmpo 0, 2, 3
@@ -1326,15 +1326,15 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
 ; PC64LE9-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
 ; PC64LE9-NEXT:    xxlxor 3, 3, 3
+; PC64LE9-NEXT:    std 0, 64(1)
 ; PC64LE9-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
 ; PC64LE9-NEXT:    fcmpo 1, 2, 3
 ; PC64LE9-NEXT:    lis 3, -32768
-; PC64LE9-NEXT:    fcmpo 0, 1, 0
 ; PC64LE9-NEXT:    xxlxor 3, 3, 3
+; PC64LE9-NEXT:    fcmpo 0, 1, 0
 ; PC64LE9-NEXT:    crand 20, 2, 4
 ; PC64LE9-NEXT:    crandc 21, 0, 2
 ; PC64LE9-NEXT:    cror 20, 21, 20
@@ -1362,12 +1362,12 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
 ;
 ; PC64-LABEL: test_fptoui_ppc_i32_ppc_fp128:
 ; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    mfcr 12
+; PC64-NEXT:    mflr 0
 ; PC64-NEXT:    stw 12, 8(1)
 ; PC64-NEXT:    stdu 1, -128(1)
 ; PC64-NEXT:    addis 3, 2, .LCPI31_0 at toc@ha
+; PC64-NEXT:    std 0, 144(1)
 ; PC64-NEXT:    lfs 0, .LCPI31_0 at toc@l(3)
 ; PC64-NEXT:    addis 3, 2, .LCPI31_1 at toc@ha
 ; PC64-NEXT:    lfs 4, .LCPI31_1 at toc@l(3)
@@ -1422,8 +1422,8 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    mr 29, 3
 ; PC64LE-NEXT:    xxlxor 2, 2, 2
 ; PC64LE-NEXT:    li 3, 0
@@ -1473,14 +1473,14 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
-; PC64LE9-NEXT:    lfs 31, 0(3)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    mr 29, 3
-; PC64LE9-NEXT:    li 3, 0
 ; PC64LE9-NEXT:    xxlxor 2, 2, 2
-; PC64LE9-NEXT:    xxlxor 4, 4, 4
 ; PC64LE9-NEXT:    mr 30, 4
+; PC64LE9-NEXT:    lfs 31, 0(3)
+; PC64LE9-NEXT:    li 3, 0
+; PC64LE9-NEXT:    xxlxor 4, 4, 4
 ; PC64LE9-NEXT:    std 3, 8(4)
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    fmr 3, 31
@@ -1519,8 +1519,8 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64-LABEL: test_constrained_libcall_multichain:
 ; PC64:       # %bb.0:
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -176(1)
+; PC64-NEXT:    std 0, 192(1)
 ; PC64-NEXT:    std 29, 120(1) # 8-byte Folded Spill
 ; PC64-NEXT:    mr 29, 3
 ; PC64-NEXT:    li 3, 0
@@ -1643,8 +1643,8 @@ define ppc_fp128 @i64_to_ppcq(i64 %m) #0 {
 ; PC64LE-LABEL: i64_to_ppcq:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __floatditf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1655,8 +1655,8 @@ define ppc_fp128 @i64_to_ppcq(i64 %m) #0 {
 ; PC64LE9-LABEL: i64_to_ppcq:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __floatditf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1667,8 +1667,8 @@ define ppc_fp128 @i64_to_ppcq(i64 %m) #0 {
 ; PC64-LABEL: i64_to_ppcq:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __floatditf
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1719,8 +1719,8 @@ define ppc_fp128 @u64_to_ppcq(i64 %m) #0 {
 ; PC64LE-NEXT:    std 30, -32(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    mr 30, 3
 ; PC64LE-NEXT:    bl __floatditf
 ; PC64LE-NEXT:    nop
@@ -1754,8 +1754,8 @@ define ppc_fp128 @u64_to_ppcq(i64 %m) #0 {
 ; PC64LE9-NEXT:    std 30, -32(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    mr 30, 3
 ; PC64LE9-NEXT:    bl __floatditf
 ; PC64LE9-NEXT:    nop
@@ -1786,8 +1786,8 @@ define ppc_fp128 @u64_to_ppcq(i64 %m) #0 {
 ; PC64-LABEL: u64_to_ppcq:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    std 0, 160(1)
 ; PC64-NEXT:    std 30, 112(1) # 8-byte Folded Spill
 ; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
 ; PC64-NEXT:    mr 30, 3
@@ -1827,8 +1827,8 @@ define ppc_fp128 @i128_to_ppcq(i128 %m) #0 {
 ; PC64LE-LABEL: i128_to_ppcq:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __floattitf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1839,8 +1839,8 @@ define ppc_fp128 @i128_to_ppcq(i128 %m) #0 {
 ; PC64LE9-LABEL: i128_to_ppcq:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __floattitf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1851,8 +1851,8 @@ define ppc_fp128 @i128_to_ppcq(i128 %m) #0 {
 ; PC64-LABEL: i128_to_ppcq:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    bl __floattitf
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    addi 1, 1, 112
@@ -1871,8 +1871,8 @@ define ppc_fp128 @u128_to_ppcq(i128 %m) #0 {
 ; PC64LE-NEXT:    std 30, -32(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    mr 30, 4
 ; PC64LE-NEXT:    bl __floattitf
 ; PC64LE-NEXT:    nop
@@ -1906,8 +1906,8 @@ define ppc_fp128 @u128_to_ppcq(i128 %m) #0 {
 ; PC64LE9-NEXT:    std 30, -32(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    mr 30, 4
 ; PC64LE9-NEXT:    bl __floattitf
 ; PC64LE9-NEXT:    nop
@@ -1938,8 +1938,8 @@ define ppc_fp128 @u128_to_ppcq(i128 %m) #0 {
 ; PC64-LABEL: u128_to_ppcq:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    std 0, 160(1)
 ; PC64-NEXT:    std 30, 112(1) # 8-byte Folded Spill
 ; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
 ; PC64-NEXT:    mr 30, 3
@@ -1979,8 +1979,8 @@ define i1 @ppcq_to_s1(ppc_fp128 %a) {
 ; PC64LE-LABEL: ppcq_to_s1:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    .cfi_def_cfa_offset 32
 ; PC64LE-NEXT:    .cfi_offset lr, 16
 ; PC64LE-NEXT:    bl __gcc_qtou
@@ -1993,8 +1993,8 @@ define i1 @ppcq_to_s1(ppc_fp128 %a) {
 ; PC64LE9-LABEL: ppcq_to_s1:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
 ; PC64LE9-NEXT:    .cfi_offset lr, 16
 ; PC64LE9-NEXT:    bl __gcc_qtou
@@ -2007,8 +2007,8 @@ define i1 @ppcq_to_s1(ppc_fp128 %a) {
 ; PC64-LABEL: ppcq_to_s1:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    .cfi_def_cfa_offset 112
 ; PC64-NEXT:    .cfi_offset lr, 16
 ; PC64-NEXT:    bl __gcc_qtou
@@ -2026,8 +2026,8 @@ define i1 @ppcq_to_u1(ppc_fp128 %a) {
 ; PC64LE-LABEL: ppcq_to_u1:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    .cfi_def_cfa_offset 32
 ; PC64LE-NEXT:    .cfi_offset lr, 16
 ; PC64LE-NEXT:    bl __fixunstfsi
@@ -2040,8 +2040,8 @@ define i1 @ppcq_to_u1(ppc_fp128 %a) {
 ; PC64LE9-LABEL: ppcq_to_u1:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
 ; PC64LE9-NEXT:    .cfi_offset lr, 16
 ; PC64LE9-NEXT:    bl __fixunstfsi
@@ -2054,8 +2054,8 @@ define i1 @ppcq_to_u1(ppc_fp128 %a) {
 ; PC64-LABEL: ppcq_to_u1:
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
 ; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    std 0, 128(1)
 ; PC64-NEXT:    .cfi_def_cfa_offset 112
 ; PC64-NEXT:    .cfi_offset lr, 16
 ; PC64-NEXT:    bl __fixunstfsi

diff  --git a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
index dc6b11160b8b6..1b1c8bf02efa9 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -28,8 +28,8 @@ define void @caller() {
 ; CHECK-LABEL: caller:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -54,8 +54,8 @@ define void @caller_const() {
 ; CHECK-LABEL: caller_const:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LCPI2_0 at toc@ha
@@ -90,8 +90,8 @@ define void @use_result() {
 ; CHECK-LABEL: use_result:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl test_result
@@ -116,8 +116,8 @@ define void @caller_result() {
 ; CHECK-LABEL: caller_result:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl test_result

diff  --git a/llvm/test/CodeGen/PowerPC/pr33547.ll b/llvm/test/CodeGen/PowerPC/pr33547.ll
index 0df1e35a18bfa..5f50fc10454ae 100644
--- a/llvm/test/CodeGen/PowerPC/pr33547.ll
+++ b/llvm/test/CodeGen/PowerPC/pr33547.ll
@@ -11,8 +11,8 @@ define void @main() {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %L.entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
@@ -47,8 +47,8 @@ define void @testFunc(ptr nocapture %r, ptr nocapture readonly %k) {
 ; CHECK-LABEL: testFunc:
 ; CHECK:       # %bb.0: # %L.entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:    std 0, 48(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl .L2$pb

diff  --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll
index 3cdc8b623e120..d26fd32d91f6f 100644
--- a/llvm/test/CodeGen/PowerPC/pr36292.ll
+++ b/llvm/test/CodeGen/PowerPC/pr36292.ll
@@ -10,8 +10,8 @@ define void @test() nounwind comdat {
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    ld 29, 0(3)
 ; CHECK-NEXT:    ld 30, 32(1)
 ; CHECK-NEXT:    cmpld 30, 29

diff  --git a/llvm/test/CodeGen/PowerPC/pr41088.ll b/llvm/test/CodeGen/PowerPC/pr41088.ll
index 9966d8251b714..fb8d330f87eea 100644
--- a/llvm/test/CodeGen/PowerPC/pr41088.ll
+++ b/llvm/test/CodeGen/PowerPC/pr41088.ll
@@ -33,17 +33,10 @@ declare dso_local fastcc { ptr, ptr } @test2(ptr) unnamed_addr
 
 define void @test(ptr %arg, ptr %arg1, ptr %arg2) unnamed_addr personality ptr @personality {
 ; CHECK-LABEL: test:
-; CHECK:         .cfi_personality 148, DW.ref.personality
-; CHECK-NEXT:    .cfi_lsda 20, .Lexception0
-; CHECK-NEXT:  .Lfunc_gep0:
-; CHECK-NEXT:    addis r2, r12, .TOC.-.Lfunc_gep0 at ha
-; CHECK-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep0 at l
-; CHECK-NEXT:  .Lfunc_lep0:
-; CHECK-NEXT:    .localentry test, .Lfunc_lep0-.Lfunc_gep0
-; CHECK-NEXT:  # %bb.0: # %bb
+; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    li r4, 0

diff  --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll
index 577d09ad99994..f2836b239ffe5 100644
--- a/llvm/test/CodeGen/PowerPC/pr43527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43527.ll
@@ -16,10 +16,10 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
 ; CHECK-NEXT:    .cfi_offset r30, -16
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    sub r30, r4, r3
 ; CHECK-NEXT:    li r29, -4
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_3: # %bb5
 ; CHECK-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll
index 4dfb9343385bf..5c29a15083158 100644
--- a/llvm/test/CodeGen/PowerPC/pr43976.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43976.ll
@@ -7,8 +7,8 @@ define dso_local signext i32 @b() local_unnamed_addr #0 {
 ; CHECK-LABEL: b:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -144(r1)
+; CHECK-NEXT:    std r0, 160(r1)
 ; CHECK-NEXT:    addis r3, r2, a at toc@ha
 ; CHECK-NEXT:    li r4, 1
 ; CHECK-NEXT:    lfd f0, a at toc@l(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll
index befa20b7f5527..e3dca13809035 100644
--- a/llvm/test/CodeGen/PowerPC/pr44183.ll
+++ b/llvm/test/CodeGen/PowerPC/pr44183.ll
@@ -11,14 +11,14 @@ define void @_ZN1m1nEv(ptr %this) local_unnamed_addr nounwind align 2 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    ld r3, 8(r3)
 ; CHECK-NEXT:    lwz r4, 36(r30)
 ; CHECK-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-NEXT:    slwi r3, r3, 31
 ; CHECK-NEXT:    clrlwi r4, r4, 31
+; CHECK-NEXT:    slwi r3, r3, 31
 ; CHECK-NEXT:    rlwimi r4, r3, 0, 0, 0
 ; CHECK-NEXT:    bl _ZN1llsE1d
 ; CHECK-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/pr45301.ll b/llvm/test/CodeGen/PowerPC/pr45301.ll
index 6259716fc4802..fbd042f6f8d81 100644
--- a/llvm/test/CodeGen/PowerPC/pr45301.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45301.ll
@@ -7,8 +7,8 @@ define dso_local void @g(ptr %agg.result) local_unnamed_addr #0 {
 ; CHECK-LABEL: g:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -112(r1)
+; CHECK-NEXT:    std r0, 128(r1)
 ; CHECK-NEXT:    bl i
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addis r4, r2, g at toc@ha

diff  --git a/llvm/test/CodeGen/PowerPC/pr45432.ll b/llvm/test/CodeGen/PowerPC/pr45432.ll
index a82b2b6247785..83b5390b6bf14 100644
--- a/llvm/test/CodeGen/PowerPC/pr45432.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45432.ll
@@ -11,9 +11,9 @@ define dso_local void @h() local_unnamed_addr #0 {
 ; CHECK-LABEL: h:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
 ; CHECK-NEXT:    addis 3, 2, g at toc@ha
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    std 30, 48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lwz 3, g at toc@l(3)
 ; CHECK-NEXT:    extswsli 30, 3, 2

diff  --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll
index e64adcbbc1e85..73c863427f61b 100644
--- a/llvm/test/CodeGen/PowerPC/pr47373.ll
+++ b/llvm/test/CodeGen/PowerPC/pr47373.ll
@@ -8,11 +8,11 @@ define void @d() local_unnamed_addr #0 {
 ; CHECK-LABEL: d:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -208(r1)
 ; CHECK-NEXT:    addis r3, r2, .LC0 at toc@ha
-; CHECK-NEXT:    std r29, 184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 224(r1)
 ; CHECK-NEXT:    ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT:    std r29, 184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 192(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ld r29, 0(r3)
 ; CHECK-NEXT:    bl c

diff  --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index e846619c11a69..493ca2ac79862 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -10,10 +10,10 @@ define void @julia__typed_vcat_20() #0 {
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    li r3, 1
 ; CHECK-NEXT:    li r30, 0
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb3
 ; CHECK-NEXT:    #
@@ -83,9 +83,9 @@ define void @julia__hypot_17() #0 {
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
 ; CHECK-NEXT:    li r30, 3
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB1_1: # %bb1
 ; CHECK-NEXT:    #
@@ -148,12 +148,12 @@ define void @func_48786() #0 {
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mfocrf r12, 32
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stw r12, 8(r1)
 ; CHECK-NEXT:    stdu r1, -48(r1)
-; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    std r0, 64(r1)
 ; CHECK-NEXT:    std r30, 32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    # implicit-def: $x30
+; CHECK-NEXT:    ld r3, 0(r3)
 ; CHECK-NEXT:    cmpdi r3, 0
 ; CHECK-NEXT:    crnot 4*cr2+lt, eq
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB2_3
@@ -261,11 +261,11 @@ define void @func_48785(half %arg) #0 {
 ; CHECK-NEXT:    std r29, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    fmr f31, f1
 ; CHECK-NEXT:    li r30, 0
 ; CHECK-NEXT:    li r29, 0
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_1: # %bb1
 ; CHECK-NEXT:    #

diff  --git a/llvm/test/CodeGen/PowerPC/pr48527.ll b/llvm/test/CodeGen/PowerPC/pr48527.ll
index d093631250812..262f7a9805fdc 100644
--- a/llvm/test/CodeGen/PowerPC/pr48527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48527.ll
@@ -13,8 +13,8 @@ define void @_ZNK1q1rEv() local_unnamed_addr #0 align 2 {
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    lwz 30, 0(3)
 ; CHECK-NEXT:    addis 3, 2, .LC0 at toc@ha
 ; CHECK-NEXT:    ld 29, .LC0 at toc@l(3)

diff  --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll
index 2fce584185157..ea84c77603d08 100644
--- a/llvm/test/CodeGen/PowerPC/pr49092.ll
+++ b/llvm/test/CodeGen/PowerPC/pr49092.ll
@@ -9,9 +9,9 @@ define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    addi r3, r3, 11
 ; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    bl __gnu_h2f_ieee

diff  --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll
index 17767798316c5..0e2f261e4b7ee 100644
--- a/llvm/test/CodeGen/PowerPC/pr55463.ll
+++ b/llvm/test/CodeGen/PowerPC/pr55463.ll
@@ -5,9 +5,9 @@ define void @baz() #0 {
 ; CHECK-LABEL: baz:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    # implicit-def: $r3
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb1
 ; CHECK-NEXT:    #
@@ -42,12 +42,12 @@ define void @wombat() #0 {
 ; CHECK-LABEL: wombat:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -48(1)
 ; CHECK-NEXT:    li 3, .LCPI1_0 at l
 ; CHECK-NEXT:    li 5, .LCPI1_1 at l
 ; CHECK-NEXT:    lis 4, .LCPI1_0 at ha
 ; CHECK-NEXT:    lis 6, .LCPI1_1 at ha
+; CHECK-NEXT:    stw 0, 52(1)
 ; CHECK-NEXT:    evstdd 29, 24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    evstdd 30, 32(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    evlddx 30, 4, 3

diff  --git a/llvm/test/CodeGen/PowerPC/pr56469.ll b/llvm/test/CodeGen/PowerPC/pr56469.ll
index f7025fc4e8d62..69722f479e916 100644
--- a/llvm/test/CodeGen/PowerPC/pr56469.ll
+++ b/llvm/test/CodeGen/PowerPC/pr56469.ll
@@ -7,7 +7,6 @@ define void @callee(float  %a, float  %b, float  %c, float  %d, float  %e, float
 ; CHECK-LABEL: callee:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    lfs 1, 24(1)
 ; CHECK-NEXT:    lis 3, .L.str at ha
@@ -16,6 +15,7 @@ define void @callee(float  %a, float  %b, float  %c, float  %d, float  %e, float
 ; CHECK-NEXT:    lfs 3, 32(1)
 ; CHECK-NEXT:    creqv 6, 6, 6
 ; CHECK-NEXT:    lfs 4, 36(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    bl printf
 ; CHECK-NEXT:    lwz 0, 20(1)
 ; CHECK-NEXT:    addi 1, 1, 16

diff  --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
index 090f1dc8fd60f..981dcb25ef96a 100644
--- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll
+++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll
@@ -84,8 +84,8 @@ define void @cse_nomerge(ptr %f1, ptr %f2, double %f3) #0 {
 ; CHECK-NEXT:    .cfi_offset f31, -8
 ; CHECK-NEXT:    std 30, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    fmr 31, 1
 ; CHECK-NEXT:    mr 30, 4
 ; CHECK-NEXT:    mffs 0
@@ -121,8 +121,8 @@ define void @cse_nomerge_readonly(ptr %f1, ptr %f2, double %f3) #0 {
 ; CHECK-NEXT:    .cfi_offset f31, -8
 ; CHECK-NEXT:    std 30, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    fmr 31, 1
 ; CHECK-NEXT:    mr 30, 4
 ; CHECK-NEXT:    mffs 0

diff  --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 257e9f4230833..8752d4be71142 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -1239,8 +1239,8 @@ define fp128 @hoo5_fmf(fp128 %a) #1 {
 ; CHECK-P7-LABEL: hoo5_fmf:
 ; CHECK-P7:       # %bb.0:
 ; CHECK-P7-NEXT:    mflr 0
-; CHECK-P7-NEXT:    std 0, 16(1)
 ; CHECK-P7-NEXT:    stdu 1, -112(1)
+; CHECK-P7-NEXT:    std 0, 128(1)
 ; CHECK-P7-NEXT:    bl sqrtf128
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    addi 1, 1, 112
@@ -1251,8 +1251,8 @@ define fp128 @hoo5_fmf(fp128 %a) #1 {
 ; CHECK-P8-LABEL: hoo5_fmf:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr 0
-; CHECK-P8-NEXT:    std 0, 16(1)
 ; CHECK-P8-NEXT:    stdu 1, -32(1)
+; CHECK-P8-NEXT:    std 0, 48(1)
 ; CHECK-P8-NEXT:    bl sqrtf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi 1, 1, 32
@@ -1272,8 +1272,8 @@ define fp128 @hoo5_safe(fp128 %a) #1 {
 ; CHECK-P7-LABEL: hoo5_safe:
 ; CHECK-P7:       # %bb.0:
 ; CHECK-P7-NEXT:    mflr 0
-; CHECK-P7-NEXT:    std 0, 16(1)
 ; CHECK-P7-NEXT:    stdu 1, -112(1)
+; CHECK-P7-NEXT:    std 0, 128(1)
 ; CHECK-P7-NEXT:    bl sqrtf128
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    addi 1, 1, 112
@@ -1284,8 +1284,8 @@ define fp128 @hoo5_safe(fp128 %a) #1 {
 ; CHECK-P8-LABEL: hoo5_safe:
 ; CHECK-P8:       # %bb.0:
 ; CHECK-P8-NEXT:    mflr 0
-; CHECK-P8-NEXT:    std 0, 16(1)
 ; CHECK-P8-NEXT:    stdu 1, -32(1)
+; CHECK-P8-NEXT:    std 0, 48(1)
 ; CHECK-P8-NEXT:    bl sqrtf128
 ; CHECK-P8-NEXT:    nop
 ; CHECK-P8-NEXT:    addi 1, 1, 32

diff  --git a/llvm/test/CodeGen/PowerPC/reg-scavenging.ll b/llvm/test/CodeGen/PowerPC/reg-scavenging.ll
index 75cf6cc4beba9..265c9e4206401 100644
--- a/llvm/test/CodeGen/PowerPC/reg-scavenging.ll
+++ b/llvm/test/CodeGen/PowerPC/reg-scavenging.ll
@@ -6,8 +6,8 @@ define dso_local signext i32 @caller(i32 signext %a, i32 signext %b) local_unnam
 ; CHECK-LABEL: caller:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -240(r1)
+; CHECK-NEXT:    std r0, 256(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 240
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    .cfi_offset v20, -192

diff  --git a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
index 7ac15408d63a2..0be72d9fc5954 100644
--- a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
+++ b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
@@ -12,12 +12,12 @@ declare void @barney.94(ptr, i32)
 define void @redundancy_on_ppc_only(i1 %arg7) nounwind {
 ; PPC64LE-LABEL: redundancy_on_ppc_only:
 ; PPC64LE:       # %bb.0: # %bb
-; PPC64LE-NEXT:    mflr 0
 ; PPC64LE-NEXT:    andi. 3, 3, 1
-; PPC64LE-NEXT:    std 0, 16(1)
+; PPC64LE-NEXT:    mflr 0
 ; PPC64LE-NEXT:    stdu 1, -32(1)
 ; PPC64LE-NEXT:    li 3, 1
 ; PPC64LE-NEXT:    li 4, 0
+; PPC64LE-NEXT:    std 0, 48(1)
 ; PPC64LE-NEXT:    iselgt 3, 3, 4
 ; PPC64LE-NEXT:    bl barney.88
 ; PPC64LE-NEXT:    nop
@@ -37,10 +37,10 @@ define void @redundancy_on_ppc_and_other_targets() nounwind {
 ; PPC64LE-LABEL: redundancy_on_ppc_and_other_targets:
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    mflr 0
-; PPC64LE-NEXT:    std 0, 16(1)
 ; PPC64LE-NEXT:    stdu 1, -32(1)
 ; PPC64LE-NEXT:    addis 3, 2, .LC0 at toc@ha
 ; PPC64LE-NEXT:    li 4, 0
+; PPC64LE-NEXT:    std 0, 48(1)
 ; PPC64LE-NEXT:    ld 3, .LC0 at toc@l(3)
 ; PPC64LE-NEXT:    std 4, 0(3)
 ; PPC64LE-NEXT:    bl barney.94

diff  --git a/llvm/test/CodeGen/PowerPC/retaddr.ll b/llvm/test/CodeGen/PowerPC/retaddr.ll
index 6dd896a5f0dc3..8d78dd0bef67e 100644
--- a/llvm/test/CodeGen/PowerPC/retaddr.ll
+++ b/llvm/test/CodeGen/PowerPC/retaddr.ll
@@ -3,8 +3,8 @@
 
 ; PPC32: foo
 ; PPC32: mflr 0
-; PPC32: stw 0, 4(1)
 ; PPC32: stwu 1, -[[STACK:[0-9]+]](1)
+; PPC32: stw 0, 20(1)
 ; PPC32: lwz [[REG:[0-9]+]], [[RETADDR:[0-9]+]](1)
 ; PPC32: stw [[REG]], 0(3)
 ; PPC32: lwz 0, [[RETADDR]](1)
@@ -14,12 +14,12 @@
 
 ; PPC64: foo
 ; PPC64: mflr 0
-; PPC64: std 0, [[RETADDR:[0-9]+]]
-; PPC64: stdu 1, -[[STACK:[0-9]+]]
+; PPC64: stdu 1, -[[#%d,STACK:]]
+; PPC64: std 0, [[#%d,RETADDR:]]
 ; PPC64: ld [[REG:[0-9]+]]
 ; PPC64: std [[REG]], 0(3)
-; PPC64: addi 1, 1, [[STACK]]
-; PPC64: ld 0, [[RETADDR]]
+; PPC64: addi 1, 1, [[#%d,STACK]]
+; PPC64: ld 0, [[#%d,RETADDR-STACK]]
 ; PPC64: mtlr 0
 ; PPC64: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/retaddr2.ll b/llvm/test/CodeGen/PowerPC/retaddr2.ll
index 9ac082e99b772..7b90903001fdf 100644
--- a/llvm/test/CodeGen/PowerPC/retaddr2.ll
+++ b/llvm/test/CodeGen/PowerPC/retaddr2.ll
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK-LABEL: @test1
 ; CHECK: mflr {{[0-9]+}}
-; CHECK: std 0, 16(1)
+; CHECK: std 0, 64(1)
 ; CHECK-DAG: ld 3, 64(1)
 ; CHECK-DAG: ld [[SR:[0-9]+]], 16(1)
 ; CHECK: mtlr [[SR]]

diff  --git a/llvm/test/CodeGen/PowerPC/retaddr_multi_levels.ll b/llvm/test/CodeGen/PowerPC/retaddr_multi_levels.ll
index 8cda3600beff6..c0a5047c48af8 100644
--- a/llvm/test/CodeGen/PowerPC/retaddr_multi_levels.ll
+++ b/llvm/test/CodeGen/PowerPC/retaddr_multi_levels.ll
@@ -14,8 +14,8 @@ define ptr @test0() nounwind readnone {
 ; CHECK-64B-LE-LABEL: test0:
 ; CHECK-64B-LE:       # %bb.0: # %entry
 ; CHECK-64B-LE-NEXT:    mflr 0
-; CHECK-64B-LE-NEXT:    std 0, 16(1)
 ; CHECK-64B-LE-NEXT:    stdu 1, -32(1)
+; CHECK-64B-LE-NEXT:    std 0, 48(1)
 ; CHECK-64B-LE-NEXT:    ld 3, 48(1)
 ; CHECK-64B-LE-NEXT:    addi 1, 1, 32
 ; CHECK-64B-LE-NEXT:    ld 0, 16(1)
@@ -25,8 +25,8 @@ define ptr @test0() nounwind readnone {
 ; CHECK-64B-BE-LABEL: test0:
 ; CHECK-64B-BE:       # %bb.0: # %entry
 ; CHECK-64B-BE-NEXT:    mflr 0
-; CHECK-64B-BE-NEXT:    std 0, 16(1)
 ; CHECK-64B-BE-NEXT:    stdu 1, -48(1)
+; CHECK-64B-BE-NEXT:    std 0, 64(1)
 ; CHECK-64B-BE-NEXT:    ld 3, 64(1)
 ; CHECK-64B-BE-NEXT:    addi 1, 1, 48
 ; CHECK-64B-BE-NEXT:    ld 0, 16(1)
@@ -36,8 +36,8 @@ define ptr @test0() nounwind readnone {
 ; CHECK-32B-BE-LABEL: test0:
 ; CHECK-32B-BE:       # %bb.0: # %entry
 ; CHECK-32B-BE-NEXT:    mflr 0
-; CHECK-32B-BE-NEXT:    stw 0, 8(1)
 ; CHECK-32B-BE-NEXT:    stwu 1, -32(1)
+; CHECK-32B-BE-NEXT:    stw 0, 40(1)
 ; CHECK-32B-BE-NEXT:    lwz 3, 40(1)
 ; CHECK-32B-BE-NEXT:    addi 1, 1, 32
 ; CHECK-32B-BE-NEXT:    lwz 0, 8(1)
@@ -52,8 +52,8 @@ define ptr @test1() nounwind readnone {
 ; CHECK-64B-LE-LABEL: test1:
 ; CHECK-64B-LE:       # %bb.0: # %entry
 ; CHECK-64B-LE-NEXT:    mflr 0
-; CHECK-64B-LE-NEXT:    std 0, 16(1)
 ; CHECK-64B-LE-NEXT:    stdu 1, -32(1)
+; CHECK-64B-LE-NEXT:    std 0, 48(1)
 ; CHECK-64B-LE-NEXT:    ld 3, 0(1)
 ; CHECK-64B-LE-NEXT:    ld 3, 0(3)
 ; CHECK-64B-LE-NEXT:    ld 3, 16(3)
@@ -65,8 +65,8 @@ define ptr @test1() nounwind readnone {
 ; CHECK-64B-BE-LABEL: test1:
 ; CHECK-64B-BE:       # %bb.0: # %entry
 ; CHECK-64B-BE-NEXT:    mflr 0
-; CHECK-64B-BE-NEXT:    std 0, 16(1)
 ; CHECK-64B-BE-NEXT:    stdu 1, -48(1)
+; CHECK-64B-BE-NEXT:    std 0, 64(1)
 ; CHECK-64B-BE-NEXT:    ld 3, 0(1)
 ; CHECK-64B-BE-NEXT:    ld 3, 0(3)
 ; CHECK-64B-BE-NEXT:    ld 3, 16(3)
@@ -78,8 +78,8 @@ define ptr @test1() nounwind readnone {
 ; CHECK-32B-BE-LABEL: test1:
 ; CHECK-32B-BE:       # %bb.0: # %entry
 ; CHECK-32B-BE-NEXT:    mflr 0
-; CHECK-32B-BE-NEXT:    stw 0, 8(1)
 ; CHECK-32B-BE-NEXT:    stwu 1, -32(1)
+; CHECK-32B-BE-NEXT:    stw 0, 40(1)
 ; CHECK-32B-BE-NEXT:    lwz 3, 0(1)
 ; CHECK-32B-BE-NEXT:    lwz 3, 0(3)
 ; CHECK-32B-BE-NEXT:    lwz 3, 8(3)
@@ -96,8 +96,8 @@ define ptr @test2() nounwind readnone {
 ; CHECK-64B-LE-LABEL: test2:
 ; CHECK-64B-LE:       # %bb.0: # %entry
 ; CHECK-64B-LE-NEXT:    mflr 0
-; CHECK-64B-LE-NEXT:    std 0, 16(1)
 ; CHECK-64B-LE-NEXT:    stdu 1, -32(1)
+; CHECK-64B-LE-NEXT:    std 0, 48(1)
 ; CHECK-64B-LE-NEXT:    ld 3, 0(1)
 ; CHECK-64B-LE-NEXT:    ld 3, 0(3)
 ; CHECK-64B-LE-NEXT:    ld 3, 0(3)
@@ -110,8 +110,8 @@ define ptr @test2() nounwind readnone {
 ; CHECK-64B-BE-LABEL: test2:
 ; CHECK-64B-BE:       # %bb.0: # %entry
 ; CHECK-64B-BE-NEXT:    mflr 0
-; CHECK-64B-BE-NEXT:    std 0, 16(1)
 ; CHECK-64B-BE-NEXT:    stdu 1, -48(1)
+; CHECK-64B-BE-NEXT:    std 0, 64(1)
 ; CHECK-64B-BE-NEXT:    ld 3, 0(1)
 ; CHECK-64B-BE-NEXT:    ld 3, 0(3)
 ; CHECK-64B-BE-NEXT:    ld 3, 0(3)
@@ -124,8 +124,8 @@ define ptr @test2() nounwind readnone {
 ; CHECK-32B-BE-LABEL: test2:
 ; CHECK-32B-BE:       # %bb.0: # %entry
 ; CHECK-32B-BE-NEXT:    mflr 0
-; CHECK-32B-BE-NEXT:    stw 0, 8(1)
 ; CHECK-32B-BE-NEXT:    stwu 1, -32(1)
+; CHECK-32B-BE-NEXT:    stw 0, 40(1)
 ; CHECK-32B-BE-NEXT:    lwz 3, 0(1)
 ; CHECK-32B-BE-NEXT:    lwz 3, 0(3)
 ; CHECK-32B-BE-NEXT:    lwz 3, 0(3)

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
index bbb7e5719d8ac..e950c0a2efac4 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
@@ -12,8 +12,8 @@ define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
 ; BE-LABEL: test_lrint:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lrint
@@ -26,8 +26,8 @@ define dso_local i64 @test_lrint(double %d) local_unnamed_addr {
 ; CHECK-LABEL: test_lrint:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lrint
@@ -53,8 +53,8 @@ define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_lrintf:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lrintf
@@ -67,8 +67,8 @@ define dso_local i64 @test_lrintf(float %f) local_unnamed_addr {
 ; CHECK-LABEL: test_lrintf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lrintf
@@ -94,8 +94,8 @@ define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
 ; BE-LABEL: test_llrint:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llrint
@@ -108,8 +108,8 @@ define dso_local i64 @test_llrint(double %d) local_unnamed_addr {
 ; CHECK-LABEL: test_llrint:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llrint
@@ -135,8 +135,8 @@ define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_llrintf:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llrintf
@@ -149,8 +149,8 @@ define dso_local i64 @test_llrintf(float %f) local_unnamed_addr {
 ; CHECK-LABEL: test_llrintf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llrintf
@@ -176,8 +176,8 @@ define dso_local i64 @test_lround(double %d) local_unnamed_addr {
 ; BE-LABEL: test_lround:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lround
@@ -190,8 +190,8 @@ define dso_local i64 @test_lround(double %d) local_unnamed_addr {
 ; CHECK-LABEL: test_lround:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lround
@@ -218,8 +218,8 @@ define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_lroundf:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lroundf
@@ -232,8 +232,8 @@ define dso_local i64 @test_lroundf(float %f) local_unnamed_addr {
 ; CHECK-LABEL: test_lroundf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lroundf
@@ -260,8 +260,8 @@ define dso_local i64 @test_llround(double %d) local_unnamed_addr {
 ; BE-LABEL: test_llround:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llround
@@ -274,8 +274,8 @@ define dso_local i64 @test_llround(double %d) local_unnamed_addr {
 ; CHECK-LABEL: test_llround:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llround
@@ -302,8 +302,8 @@ define dso_local i64 @test_llroundf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_llroundf:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llroundf
@@ -316,8 +316,8 @@ define dso_local i64 @test_llroundf(float %f) local_unnamed_addr {
 ; CHECK-LABEL: test_llroundf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llroundf
@@ -344,8 +344,8 @@ define dso_local double @test_nearbyint(double %d) local_unnamed_addr {
 ; BE-LABEL: test_nearbyint:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl nearbyint
@@ -358,8 +358,8 @@ define dso_local double @test_nearbyint(double %d) local_unnamed_addr {
 ; CHECK-LABEL: test_nearbyint:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl nearbyint
@@ -384,8 +384,8 @@ define dso_local float @test_nearbyintf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_nearbyintf:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    .cfi_def_cfa_offset 112
 ; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl nearbyintf
@@ -398,8 +398,8 @@ define dso_local float @test_nearbyintf(float %f) local_unnamed_addr {
 ; CHECK-LABEL: test_nearbyintf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl nearbyintf

diff  --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
index 68c3c1f757bd7..5b8742b6f61af 100644
--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
@@ -8,8 +8,8 @@ define void @print_res() nounwind {
 ; CHECK-LABEL: print_res:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -128(1)
+; CHECK-NEXT:    std 0, 144(1)
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    addi 3, 3, -1
 ; CHECK-NEXT:    clrldi 4, 3, 32

diff  --git a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
index 61dd310812bac..782edba77c0c5 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll
@@ -7,8 +7,8 @@ define void @main() nounwind #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -48(1)
+; CHECK-NEXT:    std 0, 64(1)
 ; CHECK-NEXT:    bl strtol
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mr 30, 3

diff  --git a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
index 39e368a4611c1..4cd60c69da30e 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
@@ -11,8 +11,8 @@ define void @phi3(i32*) nounwind {
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
+; CHECK-NEXT:    std 0, 80(1)
 ; CHECK-NEXT:    mr 30, 3
 ; CHECK-NEXT:    bl malloc
 ; CHECK-NEXT:    nop
@@ -38,7 +38,7 @@ define void @phi3(i32*) nounwind {
 ; CHECK-NEXT:    lwzu 8, 4(7)
 ; CHECK-NEXT:    bdz .LBB0_4
 ; CHECK-NEXT:    .p2align 5
-; CHECK-NEXT:  .LBB0_3: #
+; CHECK-NEXT:  .LBB0_3:
 ; CHECK-NEXT:    add 9, 3, 6
 ; CHECK-NEXT:    extswsli 6, 5, 5
 ; CHECK-NEXT:    add 5, 8, 5

diff  --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll
index 7836aa8772549..c8fbda71297c7 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -547,8 +547,8 @@ define float @test_dtos(double %a) #0 {
 ; EFPU2-LABEL: test_dtos:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __truncdfsf2
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -619,8 +619,8 @@ define double @test_ddiv(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_ddiv:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __divdf3
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -646,8 +646,8 @@ define double @test_dmul(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dmul:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __muldf3
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -672,8 +672,8 @@ define double @test_dadd(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dadd:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __adddf3
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -698,8 +698,8 @@ define double @test_dsub(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dsub:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __subdf3
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -741,8 +741,8 @@ define double @test_stod(float %a) #0 {
 ; EFPU2-LABEL: test_stod:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __extendsfdf2
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -774,8 +774,8 @@ define i1 @test_dcmpuno(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpuno:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __unorddf2
 ; EFPU2-NEXT:    cntlzw 3, 3
 ; EFPU2-NEXT:    not 3, 3
@@ -809,8 +809,8 @@ define i1 @test_dcmpord(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpord:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __unorddf2
 ; EFPU2-NEXT:    cntlzw 3, 3
 ; EFPU2-NEXT:    rlwinm 3, 3, 27, 31, 31
@@ -845,8 +845,8 @@ define i32 @test_dcmpgt(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpgt:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __gtdf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    ble 0, .LBB37_2
@@ -905,8 +905,8 @@ define i32 @test_dcmpugt(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpugt:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __ledf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    ble 0, .LBB38_2
@@ -959,8 +959,8 @@ define i32 @test_dcmple(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmple:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __gtdf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    bgt 0, .LBB39_2
@@ -1013,8 +1013,8 @@ define i32 @test_dcmpule(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpule:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __gtdf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    bgt 0, .LBB40_2
@@ -1068,8 +1068,8 @@ define i32 @test_dcmpeq(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpeq:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __nedf2
 ; EFPU2-NEXT:    cmplwi 3, 0
 ; EFPU2-NEXT:    bne 0, .LBB41_2
@@ -1125,9 +1125,9 @@ define i32 @test_dcmpueq(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpueq:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -80(1)
 ; EFPU2-NEXT:    mfcr 12
+; EFPU2-NEXT:    stw 0, 84(1)
 ; EFPU2-NEXT:    stw 12, 76(1)
 ; EFPU2-NEXT:    evstdd 27, 24(1) # 8-byte Folded Spill
 ; EFPU2-NEXT:    mr 27, 3
@@ -1201,9 +1201,9 @@ define i1 @test_dcmpne(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpne:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -80(1)
 ; EFPU2-NEXT:    mfcr 12
+; EFPU2-NEXT:    stw 0, 84(1)
 ; EFPU2-NEXT:    stw 12, 76(1)
 ; EFPU2-NEXT:    evstdd 27, 24(1) # 8-byte Folded Spill
 ; EFPU2-NEXT:    mr 27, 3
@@ -1267,8 +1267,8 @@ define i32 @test_dcmpune(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpune:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __eqdf2
 ; EFPU2-NEXT:    cmplwi 3, 0
 ; EFPU2-NEXT:    beq 0, .LBB44_2
@@ -1321,8 +1321,8 @@ define i32 @test_dcmplt(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmplt:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __ltdf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    bge 0, .LBB45_2
@@ -1381,8 +1381,8 @@ define i32 @test_dcmpult(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpult:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __gedf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    bge 0, .LBB46_2
@@ -1435,8 +1435,8 @@ define i1 @test_dcmpge(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpge:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __gedf2
 ; EFPU2-NEXT:    not 3, 3
 ; EFPU2-NEXT:    srwi 3, 3, 31
@@ -1471,8 +1471,8 @@ define i32 @test_dcmpuge(double %a, double %b) #0 {
 ; EFPU2-LABEL: test_dcmpuge:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __ltdf2
 ; EFPU2-NEXT:    cmpwi 3, 0
 ; EFPU2-NEXT:    blt 0, .LBB48_2
@@ -1541,8 +1541,8 @@ define i32 @test_dtoui(double %a) #0 {
 ; EFPU2-LABEL: test_dtoui:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __fixunsdfsi
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -1563,8 +1563,8 @@ define i32 @test_dtosi(double %a) #0 {
 ; EFPU2-LABEL: test_dtosi:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __fixdfsi
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -1587,8 +1587,8 @@ define double @test_dfromui(i32 %a) #0 {
 ; EFPU2-LABEL: test_dfromui:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __floatunsidf
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -1611,8 +1611,8 @@ define double @test_dfromsi(i32 %a) #0 {
 ; EFPU2-LABEL: test_dfromsi:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    stw 0, 20(1)
 ; EFPU2-NEXT:    bl __floatsidf
 ; EFPU2-NEXT:    lwz 0, 20(1)
 ; EFPU2-NEXT:    addi 1, 1, 16
@@ -1638,14 +1638,14 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5
 ; SPE-LABEL: test_spill:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr 0
-; SPE-NEXT:    stw 0, 4(1)
 ; SPE-NEXT:    stwu 1, -288(1)
 ; SPE-NEXT:    li 5, 256
-; SPE-NEXT:    evstddx 30, 1, 5  # 8-byte Folded Spill
-; SPE-NEXT:    li 5, .LCPI55_0@
+; SPE-NEXT:    stw 0, 292(1)
 ; SPE-NEXT:    lis 6, .LCPI55_0 at ha
+; SPE-NEXT:    evstddx 30, 1, 5 # 8-byte Folded Spill
+; SPE-NEXT:    li 5, .LCPI55_0 at l
 ; SPE-NEXT:    evlddx 5, 6, 5
-; SPE-NEXT:    stw 31, 284(1)    # 4-byte Folded Spill
+; SPE-NEXT:    stw 31, 284(1) # 4-byte Folded Spill
 ; SPE-NEXT:    evstdd 14, 128(1) # 8-byte Folded Spill
 ; SPE-NEXT:    evstdd 15, 136(1) # 8-byte Folded Spill
 ; SPE-NEXT:    evstdd 16, 144(1) # 8-byte Folded Spill
@@ -1706,7 +1706,7 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5
 ; SPE-NEXT:    evldd 16, 144(1) # 8-byte Folded Reload
 ; SPE-NEXT:    evldd 15, 136(1) # 8-byte Folded Reload
 ; SPE-NEXT:    evldd 14, 128(1) # 8-byte Folded Reload
-; SPE-NEXT:    lwz 31, 284(1)   # 4-byte Folded Reload
+; SPE-NEXT:    lwz 31, 284(1) # 4-byte Folded Reload
 ; SPE-NEXT:    lwz 0, 292(1)
 ; SPE-NEXT:    addi 1, 1, 288
 ; SPE-NEXT:    mtlr 0
@@ -1715,10 +1715,10 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, ptr %a3, ptr %a4, ptr %a5
 ; EFPU2-LABEL: test_spill:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -144(1)
 ; EFPU2-NEXT:    mr 5, 3
 ; EFPU2-NEXT:    mr 6, 4
+; EFPU2-NEXT:    stw 0, 148(1)
 ; EFPU2-NEXT:    evstdd 27, 104(1) # 8-byte Folded Spill
 ; EFPU2-NEXT:    evstdd 28, 112(1) # 8-byte Folded Spill
 ; EFPU2-NEXT:    evstdd 29, 120(1) # 8-byte Folded Spill
@@ -1778,9 +1778,9 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_fma:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -32(1)
 ; CHECK-NEXT:    cmpwi 3, 0
+; CHECK-NEXT:    stw 0, 36(1)
 ; CHECK-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ble 0, .LBB56_3
@@ -1839,8 +1839,8 @@ define void @d(ptr %e, ptr %f) #0 {
 ; SPE-LABEL: d:
 ; SPE:       # %bb.0: # %entry
 ; SPE-NEXT:    mflr 0
-; SPE-NEXT:    stw 0, 4(1)
 ; SPE-NEXT:    stwu 1, -48(1)
+; SPE-NEXT:    stw 0, 52(1)
 ; SPE-NEXT:    lwz 4, 0(4)
 ; SPE-NEXT:    lwz 3, 0(3)
 ; SPE-NEXT:    evstdd 29, 24(1) # 8-byte Folded Spill
@@ -1872,8 +1872,8 @@ define void @d(ptr %e, ptr %f) #0 {
 ; EFPU2-LABEL: d:
 ; EFPU2:       # %bb.0: # %entry
 ; EFPU2-NEXT:    mflr 0
-; EFPU2-NEXT:    stw 0, 4(1)
 ; EFPU2-NEXT:    stwu 1, -64(1)
+; EFPU2-NEXT:    stw 0, 68(1)
 ; EFPU2-NEXT:    lwz 3, 0(3)
 ; EFPU2-NEXT:    evstdd 26, 16(1) # 8-byte Folded Spill
 ; EFPU2-NEXT:    evstdd 27, 24(1) # 8-byte Folded Spill

diff  --git a/llvm/test/CodeGen/PowerPC/srem-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-lkk.ll
index 33cc8f3fe3f31..0a93255781e09 100644
--- a/llvm/test/CodeGen/PowerPC/srem-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-lkk.ll
@@ -133,8 +133,8 @@ define i64 @dont_fold_srem_i64(i64 %x) {
 ; CHECK-LABEL: dont_fold_srem_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    li 5, 0

diff  --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
index e9c78b7554ba6..bc959ea396832 100644
--- a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll
@@ -121,11 +121,11 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
 ; PPC-LABEL: test_srem_vec:
 ; PPC:       # %bb.0:
 ; PPC-NEXT:    mflr 0
-; PPC-NEXT:    stw 0, 4(1)
 ; PPC-NEXT:    stwu 1, -48(1)
+; PPC-NEXT:    stw 0, 52(1)
+; PPC-NEXT:    clrlwi 5, 5, 31
 ; PPC-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
 ; PPC-NEXT:    mr 29, 6
-; PPC-NEXT:    clrlwi 5, 5, 31
 ; PPC-NEXT:    clrlwi 6, 7, 31
 ; PPC-NEXT:    clrlwi 3, 3, 31
 ; PPC-NEXT:    stw 27, 28(1) # 4-byte Folded Spill

diff  --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
index 30adfdf024bef..2e50940f92728 100644
--- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
@@ -14,11 +14,11 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; CHECK-NEXT:    mfocrf 12, 32
 ; CHECK-NEXT:    mflr 0
 ; CHECK-NEXT:    std 31, -8(1)
-; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stw 12, 8(1)
 ; CHECK-NEXT:    stdu 1, -784(1)
 ; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-NEXT:    cmpwi 2, 3, 2
+; CHECK-NEXT:    std 0, 800(1)
 ; CHECK-NEXT:    mr 31, 1
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blt 2, .LBB0_3
@@ -59,16 +59,16 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ;
 ; BE-LABEL: main:
 ; BE:       # %bb.0: # %entry
+; BE-NEXT:    mfcr 12
 ; BE-NEXT:    mflr 0
 ; BE-NEXT:    std 31, -8(1)
-; BE-NEXT:    std 0, 16(1)
-; BE-NEXT:    mfcr 12
 ; BE-NEXT:    stw 12, 8(1)
 ; BE-NEXT:    stdu 1, -800(1)
 ; BE-NEXT:    li 4, 0
 ; BE-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; BE-NEXT:    cmpwi 2, 3, 2
 ; BE-NEXT:    mr 3, 4
+; BE-NEXT:    std 0, 816(1)
 ; BE-NEXT:    mr 31, 1
 ; BE-NEXT:    blt 2, .LBB0_3
 ; BE-NEXT:  # %bb.1: # %if.end

diff  --git a/llvm/test/CodeGen/PowerPC/store_fptoi.ll b/llvm/test/CodeGen/PowerPC/store_fptoi.ll
index df7d318c3384c..32fc55fab0609 100644
--- a/llvm/test/CodeGen/PowerPC/store_fptoi.ll
+++ b/llvm/test/CodeGen/PowerPC/store_fptoi.ll
@@ -24,10 +24,10 @@ define void @qpConv2sdw(ptr nocapture readonly %a, ptr nocapture %b) {
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-PWR8-NEXT:    std 0, 64(1)
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixkfdi
 ; CHECK-PWR8-NEXT:    nop
@@ -62,10 +62,10 @@ define void @qpConv2sw(ptr nocapture readonly %a, ptr nocapture %b) {
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-PWR8-NEXT:    std 0, 64(1)
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixkfsi
 ; CHECK-PWR8-NEXT:    nop
@@ -100,10 +100,10 @@ define void @qpConv2udw(ptr nocapture readonly %a, ptr nocapture %b) {
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-PWR8-NEXT:    std 0, 64(1)
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixunskfdi
 ; CHECK-PWR8-NEXT:    nop
@@ -138,10 +138,10 @@ define void @qpConv2uw(ptr nocapture readonly %a, ptr nocapture %b) {
 ; CHECK-PWR8-NEXT:    .cfi_offset lr, 16
 ; CHECK-PWR8-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    std 0, 16(1)
 ; CHECK-PWR8-NEXT:    stdu 1, -48(1)
-; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
+; CHECK-PWR8-NEXT:    std 0, 64(1)
 ; CHECK-PWR8-NEXT:    mr 30, 4
+; CHECK-PWR8-NEXT:    lxvd2x 0, 0, 3
 ; CHECK-PWR8-NEXT:    xxswapd 2, 0
 ; CHECK-PWR8-NEXT:    bl __fixunskfsi
 ; CHECK-PWR8-NEXT:    nop

diff  --git a/llvm/test/CodeGen/PowerPC/tailcall-speculatable-callee.ll b/llvm/test/CodeGen/PowerPC/tailcall-speculatable-callee.ll
index 423dedc2b2c8b..3c0082c8769c2 100644
--- a/llvm/test/CodeGen/PowerPC/tailcall-speculatable-callee.ll
+++ b/llvm/test/CodeGen/PowerPC/tailcall-speculatable-callee.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
 
@@ -6,8 +7,9 @@
 ; Callee should be tail called in this function since it is at a tail call position.
 define dso_local double @speculatable_callee_return_use_only (ptr nocapture %res, double %a) #0 {
 ; CHECK-LABEL: speculatable_callee_return_use_only:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: b callee
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    b callee
+; CHECK-NEXT:    #TC_RETURNd8 callee 0
 entry:
   %value = tail call double @callee(double %a) #2
   ret double %value
@@ -16,19 +18,19 @@ entry:
 ; Callee should not be tail called since it is not at a tail call position.
 define dso_local void @speculatable_callee_non_return_use_only (ptr nocapture %res, double %a) #0 {
 ; CHECK-LABEL: speculatable_callee_non_return_use_only:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r30, -16(r1)  # 8-byte Folded Spill
-; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -48(r1)
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: bl callee
-; CHECK-NEXT: stfd f1, 0(r30)
-; CHECK-NEXT: addi r1, r1, 48
-; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT: mtlr r0
-; CHECK-NEXT: blr
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    stfd f1, 0(r30)
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
 entry:
   %call = tail call double @callee(double %a) #2
   store double %call, ptr %res, align 8
@@ -37,20 +39,20 @@ entry:
 
 ; Callee should not be tail called since it is not at a tail call position.
 define dso_local double @speculatable_callee_multi_use (ptr nocapture %res, double %a) #0 {
-  ; CHECK-LABEL: speculatable_callee_multi_use:
-  ; CHECK: # %bb.0: # %entry
-  ; CHECK-NEXT: mflr r0
-  ; CHECK-NEXT: std r30, -16(r1)  # 8-byte Folded Spill
-  ; CHECK-NEXT: std r0, 16(r1)
-  ; CHECK-NEXT: stdu r1, -48(r1)
-  ; CHECK-NEXT: mr r30, r3
-  ; CHECK-NEXT: bl callee
-  ; CHECK-NEXT: stfd f1, 0(r30)
-  ; CHECK-NEXT: addi r1, r1, 48
-  ; CHECK-NEXT: ld r0, 16(r1)
-  ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-  ; CHECK-NEXT: mtlr r0
-  ; CHECK-NEXT: blr
+; CHECK-LABEL: speculatable_callee_multi_use:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    stfd f1, 0(r30)
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
   entry:
   %call = tail call double @callee(double %a) #2
   store double %call, ptr %res, align 8
@@ -60,25 +62,25 @@ define dso_local double @speculatable_callee_multi_use (ptr nocapture %res, doub
 ; Callee should not be tail called since it is not at a tail call position.
 ; FIXME: A speculatable callee can be tail called if it is moved into a valid tail call position.
 define dso_local double @speculatable_callee_intermediate_instructions (ptr nocapture %res, double %a) #0 {
-  ; CHECK-LABEL: speculatable_callee_intermediate_instructions:
-  ; CHECK: # %bb.0: # %entry
-  ; CHECK-NEXT: mflr r0
-  ; CHECK-NEXT: std r30, -16(r1)  # 8-byte Folded Spill
-  ; CHECK-NEXT: std r0, 16(r1)
-  ; CHECK-NEXT: stdu r1, -48(r1)
-  ; CHECK-NEXT: mr r30, r3
-  ; CHECK-NEXT: bl callee
-  ; CHECK-NEXT: lis r3, 4101
-  ; CHECK-NEXT: ori r3, r3, 13107
-  ; CHECK-NEXT: rldic r3, r3, 34, 1
-  ; CHECK-NEXT: oris r3, r3, 52428
-  ; CHECK-NEXT: ori r3, r3, 52429
-  ; CHECK-NEXT: std r3, 0(r30)
-  ; CHECK-NEXT: addi r1, r1, 48
-  ; CHECK-NEXT: ld r0, 16(r1)
-  ; CHECK-NEXT: ld r30, -16(r1)  # 8-byte Folded Reload
-  ; CHECK-NEXT: mtlr r0
-  ; CHECK-NEXT: blr
+; CHECK-LABEL: speculatable_callee_intermediate_instructions:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    std r0, 64(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    lis r3, 4101
+; CHECK-NEXT:    ori r3, r3, 13107
+; CHECK-NEXT:    rldic r3, r3, 34, 1
+; CHECK-NEXT:    oris r3, r3, 52428
+; CHECK-NEXT:    ori r3, r3, 52429
+; CHECK-NEXT:    std r3, 0(r30)
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
 
   entry:
   %call = tail call double @callee(double %a) #2
@@ -88,6 +90,11 @@ define dso_local double @speculatable_callee_intermediate_instructions (ptr noca
 
 
 define dso_local double @callee(double) #1 {
+; CHECK-LABEL: callee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-NEXT:    lfs f1, .LCPI4_0 at toc@l(r3)
+; CHECK-NEXT:    blr
   ret double 4.5
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 2f6bd65f44c34..99ad7c8c93bb2 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -30,15 +30,15 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; BE-LABEL: testCompare1:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; BE-NEXT:    lbz r3, 0(r3)
 ; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
-; BE-NEXT:    clrlwi r3, r3, 31
+; BE-NEXT:    lbz r3, 0(r3)
 ; BE-NEXT:    clrlwi r4, r4, 31
-; BE-NEXT:    clrldi r3, r3, 32
+; BE-NEXT:    clrlwi r3, r3, 31
 ; BE-NEXT:    clrldi r4, r4, 32
+; BE-NEXT:    clrldi r3, r3, 32
 ; BE-NEXT:    sub r3, r3, r4
 ; BE-NEXT:    rldicl r3, r3, 1, 63
 ; BE-NEXT:    bl fn2
@@ -51,15 +51,15 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; LE-LABEL: testCompare1:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    mflr r0
-; LE-NEXT:    std r0, 16(r1)
 ; LE-NEXT:    stdu r1, -32(r1)
+; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; LE-NEXT:    lbz r3, 0(r3)
 ; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
-; LE-NEXT:    clrlwi r3, r3, 31
+; LE-NEXT:    lbz r3, 0(r3)
 ; LE-NEXT:    clrlwi r4, r4, 31
-; LE-NEXT:    clrldi r3, r3, 32
+; LE-NEXT:    clrlwi r3, r3, 31
 ; LE-NEXT:    clrldi r4, r4, 32
+; LE-NEXT:    clrldi r3, r3, 32
 ; LE-NEXT:    sub r3, r3, r4
 ; LE-NEXT:    rldicl r3, r3, 1, 63
 ; LE-NEXT:    bl fn2

diff  --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 641ea0e3dcecb..030dfde8f3b71 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -32,15 +32,15 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; BE-LABEL: testCompare1:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    mflr r0
-; BE-NEXT:    std r0, 16(r1)
 ; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
 ; BE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; BE-NEXT:    lbz r3, 0(r3)
 ; BE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
-; BE-NEXT:    clrlwi r3, r3, 31
+; BE-NEXT:    lbz r3, 0(r3)
 ; BE-NEXT:    clrlwi r4, r4, 31
-; BE-NEXT:    clrldi r3, r3, 32
+; BE-NEXT:    clrlwi r3, r3, 31
 ; BE-NEXT:    clrldi r4, r4, 32
+; BE-NEXT:    clrldi r3, r3, 32
 ; BE-NEXT:    sub r3, r4, r3
 ; BE-NEXT:    rldicl r3, r3, 1, 63
 ; BE-NEXT:    bl fn2
@@ -53,15 +53,15 @@ define dso_local i32 @testCompare1(ptr nocapture readonly %arg1) nounwind {
 ; LE-LABEL: testCompare1:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    mflr r0
-; LE-NEXT:    std r0, 16(r1)
 ; LE-NEXT:    stdu r1, -32(r1)
+; LE-NEXT:    std r0, 48(r1)
 ; LE-NEXT:    addis r4, r2, testCompare1 at toc@ha
-; LE-NEXT:    lbz r3, 0(r3)
 ; LE-NEXT:    lbz r4, testCompare1 at toc@l(r4)
-; LE-NEXT:    clrlwi r3, r3, 31
+; LE-NEXT:    lbz r3, 0(r3)
 ; LE-NEXT:    clrlwi r4, r4, 31
-; LE-NEXT:    clrldi r3, r3, 32
+; LE-NEXT:    clrlwi r3, r3, 31
 ; LE-NEXT:    clrldi r4, r4, 32
+; LE-NEXT:    clrldi r3, r3, 32
 ; LE-NEXT:    sub r3, r4, r3
 ; LE-NEXT:    rldicl r3, r3, 1, 63
 ; LE-NEXT:    bl fn2

diff  --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
index 13ba5ba63c163..fd960c15b305c 100644
--- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
+++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
@@ -13,12 +13,12 @@ define dso_local void @test(ptr nocapture %fp, i32 signext %Arg, i32 signext %Le
 ; CHECK-NEXT:    std r28, -32(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    mr r29, r5
 ; CHECK-NEXT:    mr r30, r4
 ; CHECK-NEXT:    mr r28, r3
 ; CHECK-NEXT:    std r2, 24(r1)
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    cmpwi r29, 1
 ; CHECK-NEXT:    bc 12, lt, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/urem-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-lkk.ll
index 3ed54c69ccd51..43a1e5a2faf6d 100644
--- a/llvm/test/CodeGen/PowerPC/urem-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-lkk.ll
@@ -90,8 +90,8 @@ define i64 @dont_fold_urem_i64(i64 %x) {
 ; CHECK-LABEL: dont_fold_urem_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
 ; CHECK-NEXT:    stwu 1, -16(1)
+; CHECK-NEXT:    stw 0, 20(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset lr, 4
 ; CHECK-NEXT:    li 5, 0

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 90880c5773357..f2ea2f3a3b4ea 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -168,8 +168,8 @@ define <1 x float> @constrained_vector_frem_v1f32(<1 x float> %x, <1 x float> %y
 ; PC64LE-LABEL: constrained_vector_frem_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fmodf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -180,8 +180,8 @@ define <1 x float> @constrained_vector_frem_v1f32(<1 x float> %x, <1 x float> %y
 ; PC64LE9-LABEL: constrained_vector_frem_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -201,9 +201,9 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-LABEL: constrained_vector_frem_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -238,8 +238,8 @@ define <2 x double> @constrained_vector_frem_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-LABEL: constrained_vector_frem_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 64(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 3
@@ -278,20 +278,20 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE-LABEL: constrained_vector_frem_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    bl fmodf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 62
@@ -331,18 +331,18 @@ define <3 x float> @constrained_vector_frem_v3f32(<3 x float> %x, <3 x float> %y
 ; PC64LE9-LABEL: constrained_vector_frem_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    xxsldwi 0, 35, 35, 1
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    vmr 30, 2
+; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 62
@@ -388,18 +388,18 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-LABEL: constrained_vector_frem_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stfd 28, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 28, 2
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 2, 4
 ; PC64LE-NEXT:    stfd 29, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 5
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 6
-; PC64LE-NEXT:    fmr 30, 5
 ; PC64LE-NEXT:    fmr 29, 3
 ; PC64LE-NEXT:    bl fmod
 ; PC64LE-NEXT:    nop
@@ -432,11 +432,11 @@ define <3 x double> @constrained_vector_frem_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-LABEL: constrained_vector_frem_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 28, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    fmr 28, 2
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    fmr 28, 2
 ; PC64LE9-NEXT:    fmr 2, 4
 ; PC64LE9-NEXT:    stfd 29, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
@@ -483,9 +483,9 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-LABEL: constrained_vector_frem_v4f64:
 ; PC64LE:       # %bb.0:
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -128(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 144(1)
 ; PC64LE-NEXT:    stxvd2x 59, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 60, 1, 3 # 16-byte Folded Spill
@@ -544,8 +544,8 @@ define <4 x double> @constrained_vector_frem_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-LABEL: constrained_vector_frem_v4f64:
 ; PC64LE9:       # %bb.0:
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -112(1)
+; PC64LE9-NEXT:    std 0, 128(1)
 ; PC64LE9-NEXT:    stxv 60, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 80(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 4
@@ -1228,8 +1228,8 @@ define <1 x float> @constrained_vector_pow_v1f32(<1 x float> %x, <1 x float> %y)
 ; PC64LE-LABEL: constrained_vector_pow_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl powf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1240,8 +1240,8 @@ define <1 x float> @constrained_vector_pow_v1f32(<1 x float> %x, <1 x float> %y)
 ; PC64LE9-LABEL: constrained_vector_pow_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1261,9 +1261,9 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE-LABEL: constrained_vector_pow_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -1298,8 +1298,8 @@ define <2 x double> @constrained_vector_pow_v2f64(<2 x double> %x, <2 x double>
 ; PC64LE9-LABEL: constrained_vector_pow_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 64(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 3
@@ -1338,20 +1338,20 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE-LABEL: constrained_vector_pow_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    bl powf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 62
@@ -1391,18 +1391,18 @@ define <3 x float> @constrained_vector_pow_v3f32(<3 x float> %x, <3 x float> %y)
 ; PC64LE9-LABEL: constrained_vector_pow_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    xxsldwi 0, 35, 35, 1
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    vmr 30, 2
+; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 62
@@ -1448,18 +1448,18 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-LABEL: constrained_vector_pow_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stfd 28, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    fmr 28, 2
 ; PC64LE-NEXT:    li 3, 48
 ; PC64LE-NEXT:    fmr 2, 4
 ; PC64LE-NEXT:    stfd 29, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 5
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 6
-; PC64LE-NEXT:    fmr 30, 5
 ; PC64LE-NEXT:    fmr 29, 3
 ; PC64LE-NEXT:    bl pow
 ; PC64LE-NEXT:    nop
@@ -1492,11 +1492,11 @@ define <3 x double> @constrained_vector_pow_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-LABEL: constrained_vector_pow_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 28, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    fmr 28, 2
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    fmr 28, 2
 ; PC64LE9-NEXT:    fmr 2, 4
 ; PC64LE9-NEXT:    stfd 29, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
@@ -1543,9 +1543,9 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE-LABEL: constrained_vector_pow_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -128(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 144(1)
 ; PC64LE-NEXT:    stxvd2x 59, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 60, 1, 3 # 16-byte Folded Spill
@@ -1604,8 +1604,8 @@ define <4 x double> @constrained_vector_pow_v4f64(<4 x double> %x, <4 x double>
 ; PC64LE9-LABEL: constrained_vector_pow_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -112(1)
+; PC64LE9-NEXT:    std 0, 128(1)
 ; PC64LE9-NEXT:    stxv 60, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 80(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 4
@@ -1664,9 +1664,9 @@ define <1 x float> @constrained_vector_powi_v1f32(<1 x float> %x, i32 %y) #0 {
 ; PC64LE-LABEL: constrained_vector_powi_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
 ; PC64LE-NEXT:    clrldi 4, 4, 32
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl __powisf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -1677,9 +1677,9 @@ define <1 x float> @constrained_vector_powi_v1f32(<1 x float> %x, i32 %y) #0 {
 ; PC64LE9-LABEL: constrained_vector_powi_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
 ; PC64LE9-NEXT:    clrldi 4, 4, 32
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1699,14 +1699,14 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE-LABEL: constrained_vector_powi_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    std 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 5, 32
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
 ; PC64LE-NEXT:    xxlor 1, 63, 63
@@ -1733,15 +1733,15 @@ define <2 x double> @constrained_vector_powi_v2f64(<2 x double> %x, i32 %y) #0 {
 ; PC64LE9-LABEL: constrained_vector_powi_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    std 30, 64(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    clrldi 30, 5, 32
+; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
 ; PC64LE9-NEXT:    mr 4, 30
-; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    mr 4, 30
@@ -1774,18 +1774,18 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE-LABEL: constrained_vector_powi_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    std 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 5, 32
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    mr 4, 30
+; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 2
-; PC64LE-NEXT:    xscvspdpn 1, 0
 ; PC64LE-NEXT:    bl __powisf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 63
@@ -1822,16 +1822,16 @@ define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 {
 ; PC64LE9-LABEL: constrained_vector_powi_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    std 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    clrldi 30, 5, 32
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    clrldi 30, 5, 32
 ; PC64LE9-NEXT:    vmr 31, 2
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    mr 4, 30
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
@@ -1876,17 +1876,17 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE-LABEL: constrained_vector_powi_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    std 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 6, 32
 ; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    mr 4, 30
+; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    fmr 31, 3
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    fmr 31, 3
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    bl __powidf2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -1917,11 +1917,11 @@ define <3 x double> @constrained_vector_powi_v3f64(<3 x double> %x, i32 %y) #0 {
 ; PC64LE9-LABEL: constrained_vector_powi_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    std 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    clrldi 30, 6, 32
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    clrldi 30, 6, 32
 ; PC64LE9-NEXT:    mr 4, 30
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
@@ -1965,14 +1965,14 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE-LABEL: constrained_vector_powi_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -112(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 128(1)
 ; PC64LE-NEXT:    std 30, 96(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    clrldi 30, 7, 32
+; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    mr 4, 30
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    li 3, 80
@@ -2017,15 +2017,15 @@ define <4 x double> @constrained_vector_powi_v4f64(<4 x double> %x, i32 %y) #0 {
 ; PC64LE9-LABEL: constrained_vector_powi_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -96(1)
+; PC64LE9-NEXT:    std 0, 112(1)
 ; PC64LE9-NEXT:    std 30, 80(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    clrldi 30, 7, 32
+; PC64LE9-NEXT:    stxv 61, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
 ; PC64LE9-NEXT:    mr 4, 30
-; PC64LE9-NEXT:    stxv 61, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 64(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    bl __powidf2
@@ -2072,8 +2072,8 @@ define <1 x float> @constrained_vector_sin_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sin_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl sinf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -2084,8 +2084,8 @@ define <1 x float> @constrained_vector_sin_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_sin_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl sinf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -2104,9 +2104,9 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sin_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2133,8 +2133,8 @@ define <2 x double> @constrained_vector_sin_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_sin_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -2166,10 +2166,10 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sin_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2208,14 +2208,14 @@ define <3 x float> @constrained_vector_sin_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_sin_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl sinf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -2255,14 +2255,14 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sin_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl sin
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -2290,11 +2290,11 @@ define <3 x double> @constrained_vector_sin_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_sin_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl sin
@@ -2331,9 +2331,9 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_sin_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -2376,8 +2376,8 @@ define <4 x double> @constrained_vector_sin_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_sin_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -2423,8 +2423,8 @@ define <1 x float> @constrained_vector_cos_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_cos_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl cosf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -2435,8 +2435,8 @@ define <1 x float> @constrained_vector_cos_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_cos_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl cosf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -2455,9 +2455,9 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_cos_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2484,8 +2484,8 @@ define <2 x double> @constrained_vector_cos_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_cos_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -2517,10 +2517,10 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_cos_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2559,14 +2559,14 @@ define <3 x float> @constrained_vector_cos_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_cos_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl cosf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -2606,14 +2606,14 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_cos_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl cos
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -2641,11 +2641,11 @@ define <3 x double> @constrained_vector_cos_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_cos_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl cos
@@ -2682,9 +2682,9 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_cos_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -2727,8 +2727,8 @@ define <4 x double> @constrained_vector_cos_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_cos_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -2774,8 +2774,8 @@ define <1 x float> @constrained_vector_exp_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl expf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -2786,8 +2786,8 @@ define <1 x float> @constrained_vector_exp_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl expf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -2806,9 +2806,9 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2835,8 +2835,8 @@ define <2 x double> @constrained_vector_exp_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -2868,10 +2868,10 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -2910,14 +2910,14 @@ define <3 x float> @constrained_vector_exp_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl expf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -2957,14 +2957,14 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl exp
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -2992,11 +2992,11 @@ define <3 x double> @constrained_vector_exp_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl exp
@@ -3033,9 +3033,9 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -3078,8 +3078,8 @@ define <4 x double> @constrained_vector_exp_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -3125,8 +3125,8 @@ define <1 x float> @constrained_vector_exp2_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp2_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl exp2f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -3137,8 +3137,8 @@ define <1 x float> @constrained_vector_exp2_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp2_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl exp2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -3157,9 +3157,9 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp2_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3186,8 +3186,8 @@ define <2 x double> @constrained_vector_exp2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp2_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -3219,10 +3219,10 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp2_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3261,14 +3261,14 @@ define <3 x float> @constrained_vector_exp2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp2_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl exp2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -3308,14 +3308,14 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp2_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl exp2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -3343,11 +3343,11 @@ define <3 x double> @constrained_vector_exp2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp2_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl exp2
@@ -3384,9 +3384,9 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_exp2_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -3429,8 +3429,8 @@ define <4 x double> @constrained_vector_exp2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_exp2_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -3476,8 +3476,8 @@ define <1 x float> @constrained_vector_log_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl logf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -3488,8 +3488,8 @@ define <1 x float> @constrained_vector_log_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl logf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -3508,9 +3508,9 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3537,8 +3537,8 @@ define <2 x double> @constrained_vector_log_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -3570,10 +3570,10 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3612,14 +3612,14 @@ define <3 x float> @constrained_vector_log_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl logf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -3659,14 +3659,14 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl log
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -3694,11 +3694,11 @@ define <3 x double> @constrained_vector_log_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl log
@@ -3735,9 +3735,9 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -3780,8 +3780,8 @@ define <4 x double> @constrained_vector_log_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -3827,8 +3827,8 @@ define <1 x float> @constrained_vector_log10_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log10_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl log10f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -3839,8 +3839,8 @@ define <1 x float> @constrained_vector_log10_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log10_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl log10f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -3859,9 +3859,9 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log10_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3888,8 +3888,8 @@ define <2 x double> @constrained_vector_log10_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log10_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -3921,10 +3921,10 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log10_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -3963,14 +3963,14 @@ define <3 x float> @constrained_vector_log10_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log10_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl log10f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -4010,14 +4010,14 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log10_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl log10
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -4045,11 +4045,11 @@ define <3 x double> @constrained_vector_log10_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log10_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl log10
@@ -4086,9 +4086,9 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log10_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -4131,8 +4131,8 @@ define <4 x double> @constrained_vector_log10_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log10_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -4178,8 +4178,8 @@ define <1 x float> @constrained_vector_log2_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log2_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl log2f
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -4190,8 +4190,8 @@ define <1 x float> @constrained_vector_log2_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log2_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl log2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -4210,9 +4210,9 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log2_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -4239,8 +4239,8 @@ define <2 x double> @constrained_vector_log2_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log2_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -4272,10 +4272,10 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log2_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -4314,14 +4314,14 @@ define <3 x float> @constrained_vector_log2_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log2_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl log2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -4361,14 +4361,14 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log2_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl log2
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -4396,11 +4396,11 @@ define <3 x double> @constrained_vector_log2_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log2_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl log2
@@ -4437,9 +4437,9 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_log2_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -4482,8 +4482,8 @@ define <4 x double> @constrained_vector_log2_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_log2_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -4668,8 +4668,8 @@ define <1 x float> @constrained_vector_nearbyint_v1f32(<1 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl nearbyintf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -4680,8 +4680,8 @@ define <1 x float> @constrained_vector_nearbyint_v1f32(<1 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl nearbyintf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -4700,9 +4700,9 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -4729,8 +4729,8 @@ define <2 x double> @constrained_vector_nearbyint_v2f64(<2 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
@@ -4762,10 +4762,10 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
@@ -4804,14 +4804,14 @@ define <3 x float> @constrained_vector_nearbyint_v3f32(<3 x float> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 1, 0
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 31, 2
+; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    bl nearbyintf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 63
@@ -4851,14 +4851,14 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_nearby_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -80(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 96(1)
 ; PC64LE-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    fmr 30, 2
 ; PC64LE-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 31, 3
-; PC64LE-NEXT:    fmr 30, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    bl nearbyint
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxlor 63, 1, 1
@@ -4886,11 +4886,11 @@ define <3 x double> @constrained_vector_nearby_v3f64(<3 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    std 0, 80(1)
 ; PC64LE9-NEXT:    stfd 30, 48(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    fmr 31, 3
 ; PC64LE9-NEXT:    fmr 30, 2
 ; PC64LE9-NEXT:    bl nearbyint
@@ -4927,9 +4927,9 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 61, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
@@ -4972,8 +4972,8 @@ define <4 x double> @constrained_vector_nearbyint_v4f64(<4 x double> %x) #0 {
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stxv 62, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    vmr 30, 2
 ; PC64LE9-NEXT:    xscpsgndp 1, 62, 62
@@ -5019,8 +5019,8 @@ define <1 x float> @constrained_vector_maxnum_v1f32(<1 x float> %x, <1 x float>
 ; PC64LE-LABEL: constrained_vector_maxnum_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fmaxf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -5031,8 +5031,8 @@ define <1 x float> @constrained_vector_maxnum_v1f32(<1 x float> %x, <1 x float>
 ; PC64LE9-LABEL: constrained_vector_maxnum_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -5068,20 +5068,20 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-LABEL: constrained_vector_maxnum_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    bl fmaxf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 62
@@ -5121,18 +5121,18 @@ define <3 x float> @constrained_vector_maxnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-LABEL: constrained_vector_maxnum_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    xxsldwi 0, 35, 35, 1
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    vmr 30, 2
+; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 62
@@ -5177,7 +5177,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-LABEL: constrained_vector_max_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
@@ -5185,6 +5184,7 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 2, 6
@@ -5206,7 +5206,6 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-LABEL: constrained_vector_max_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
@@ -5214,6 +5213,7 @@ define <3 x double> @constrained_vector_max_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
+; PC64LE9-NEXT:    std 0, 64(1)
 ; PC64LE9-NEXT:    fmr 2, 6
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xvmaxdp 63, 1, 0
@@ -5261,8 +5261,8 @@ define <1 x float> @constrained_vector_minnum_v1f32(<1 x float> %x, <1 x float>
 ; PC64LE-LABEL: constrained_vector_minnum_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    std 0, 48(1)
 ; PC64LE-NEXT:    bl fminf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    addi 1, 1, 32
@@ -5273,8 +5273,8 @@ define <1 x float> @constrained_vector_minnum_v1f32(<1 x float> %x, <1 x float>
 ; PC64LE9-LABEL: constrained_vector_minnum_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    std 0, 48(1)
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -5310,20 +5310,20 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE-LABEL: constrained_vector_minnum_v3f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -96(1)
 ; PC64LE-NEXT:    xxsldwi 0, 34, 34, 1
 ; PC64LE-NEXT:    xxsldwi 2, 35, 35, 1
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 112(1)
 ; PC64LE-NEXT:    stxvd2x 62, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    stfd 30, 80(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 1, 0
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
 ; PC64LE-NEXT:    xscvspdpn 2, 2
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    vmr 31, 3
-; PC64LE-NEXT:    vmr 30, 2
 ; PC64LE-NEXT:    bl fminf
 ; PC64LE-NEXT:    nop
 ; PC64LE-NEXT:    xxswapd 0, 62
@@ -5363,18 +5363,18 @@ define <3 x float> @constrained_vector_minnum_v3f32(<3 x float> %x, <3 x float>
 ; PC64LE9-LABEL: constrained_vector_minnum_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    xxsldwi 0, 34, 34, 1
+; PC64LE9-NEXT:    std 0, 96(1)
 ; PC64LE9-NEXT:    stfd 30, 64(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    stxv 62, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xscvspdpn 1, 0
 ; PC64LE9-NEXT:    xxsldwi 0, 35, 35, 1
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    vmr 31, 3
 ; PC64LE9-NEXT:    vmr 30, 2
+; PC64LE9-NEXT:    xscvspdpn 2, 0
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xxswapd 0, 62
@@ -5419,7 +5419,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-LABEL: constrained_vector_min_v3f64:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
 ; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE-NEXT:    # kill: def $f5 killed $f5 def $vsl5
@@ -5427,6 +5426,7 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE-NEXT:    # kill: def $f2 killed $f2 def $vsl2
 ; PC64LE-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    std 0, 80(1)
 ; PC64LE-NEXT:    xxmrghd 1, 2, 1
 ; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    fmr 2, 6
@@ -5448,7 +5448,6 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-LABEL: constrained_vector_min_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    # kill: def $f5 killed $f5 def $vsl5
 ; PC64LE9-NEXT:    # kill: def $f4 killed $f4 def $vsl4
@@ -5456,6 +5455,7 @@ define <3 x double> @constrained_vector_min_v3f64(<3 x double> %x, <3 x double>
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    xxmrghd 0, 5, 4
 ; PC64LE9-NEXT:    xxmrghd 1, 2, 1
+; PC64LE9-NEXT:    std 0, 64(1)
 ; PC64LE9-NEXT:    fmr 2, 6
 ; PC64LE9-NEXT:    stxv 63, 32(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    xvmindp 63, 1, 0

diff  --git a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
index 20bfcfcccd399..edd3fb7b1754b 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
@@ -3493,8 +3493,8 @@ define dso_local ppc_fp128 @v2ppcf128(<2 x ppc_fp128> %a) local_unnamed_addr #0
 ; PWR9LE-LABEL: v2ppcf128:
 ; PWR9LE:       # %bb.0: # %entry
 ; PWR9LE-NEXT:    mflr r0
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -32(r1)
+; PWR9LE-NEXT:    std r0, 48(r1)
 ; PWR9LE-NEXT:    bl __gcc_qadd
 ; PWR9LE-NEXT:    nop
 ; PWR9LE-NEXT:    addi r1, r1, 32
@@ -3505,8 +3505,8 @@ define dso_local ppc_fp128 @v2ppcf128(<2 x ppc_fp128> %a) local_unnamed_addr #0
 ; PWR9BE-LABEL: v2ppcf128:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -112(r1)
+; PWR9BE-NEXT:    std r0, 128(r1)
 ; PWR9BE-NEXT:    bl __gcc_qadd
 ; PWR9BE-NEXT:    nop
 ; PWR9BE-NEXT:    addi r1, r1, 112
@@ -3547,7 +3547,6 @@ define dso_local ppc_fp128 @v2ppcf128_b(<2 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9LE-NEXT:    mflr r0
 ; PWR9LE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -48(r1)
 ; PWR9LE-NEXT:    fmr f31, f4
 ; PWR9LE-NEXT:    fmr f30, f3
@@ -3555,6 +3554,7 @@ define dso_local ppc_fp128 @v2ppcf128_b(<2 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9LE-NEXT:    fmr f3, f1
 ; PWR9LE-NEXT:    fmr f1, f5
 ; PWR9LE-NEXT:    fmr f2, f6
+; PWR9LE-NEXT:    std r0, 64(r1)
 ; PWR9LE-NEXT:    bl __gcc_qadd
 ; PWR9LE-NEXT:    nop
 ; PWR9LE-NEXT:    fmr f3, f30
@@ -3571,8 +3571,8 @@ define dso_local ppc_fp128 @v2ppcf128_b(<2 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9BE-LABEL: v2ppcf128_b:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -128(r1)
+; PWR9BE-NEXT:    std r0, 144(r1)
 ; PWR9BE-NEXT:    stfd f30, 112(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    stfd f31, 120(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    fmr f31, f4
@@ -3652,8 +3652,8 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9LE-LABEL: v2ppcf128_fast:
 ; PWR9LE:       # %bb.0: # %entry
 ; PWR9LE-NEXT:    mflr r0
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -64(r1)
+; PWR9LE-NEXT:    std r0, 80(r1)
 ; PWR9LE-NEXT:    bl __gcc_qadd
 ; PWR9LE-NEXT:    nop
 ; PWR9LE-NEXT:    stfd f2, 40(r1)
@@ -3670,8 +3670,8 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
 ; PWR9BE-LABEL: v2ppcf128_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -144(r1)
+; PWR9BE-NEXT:    std r0, 160(r1)
 ; PWR9BE-NEXT:    bl __gcc_qadd
 ; PWR9BE-NEXT:    nop
 ; PWR9BE-NEXT:    stfd f2, 120(r1)
@@ -3732,8 +3732,8 @@ define dso_local ppc_fp128 @v4ppcf128(<4 x ppc_fp128> %a) local_unnamed_addr #0
 ; PWR9LE-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -64(r1)
+; PWR9LE-NEXT:    std r0, 80(r1)
 ; PWR9LE-NEXT:    fmr f31, f8
 ; PWR9LE-NEXT:    fmr f30, f7
 ; PWR9LE-NEXT:    fmr f29, f6
@@ -3760,8 +3760,8 @@ define dso_local ppc_fp128 @v4ppcf128(<4 x ppc_fp128> %a) local_unnamed_addr #0
 ; PWR9BE-LABEL: v4ppcf128:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -144(r1)
+; PWR9BE-NEXT:    std r0, 160(r1)
 ; PWR9BE-NEXT:    stfd f28, 112(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    stfd f29, 120(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    stfd f30, 128(r1) # 8-byte Folded Spill
@@ -3864,7 +3864,6 @@ define dso_local ppc_fp128 @v4ppcf128_b(<4 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9LE-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -80(r1)
 ; PWR9LE-NEXT:    fmr f27, f4
 ; PWR9LE-NEXT:    fmr f26, f3
@@ -3872,6 +3871,7 @@ define dso_local ppc_fp128 @v4ppcf128_b(<4 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9LE-NEXT:    fmr f3, f1
 ; PWR9LE-NEXT:    fmr f1, f9
 ; PWR9LE-NEXT:    fmr f2, f10
+; PWR9LE-NEXT:    std r0, 96(r1)
 ; PWR9LE-NEXT:    fmr f31, f8
 ; PWR9LE-NEXT:    fmr f30, f7
 ; PWR9LE-NEXT:    fmr f29, f6
@@ -3904,8 +3904,8 @@ define dso_local ppc_fp128 @v4ppcf128_b(<4 x ppc_fp128> %a, ppc_fp128 %b) local_
 ; PWR9BE-LABEL: v4ppcf128_b:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -160(r1)
+; PWR9BE-NEXT:    std r0, 176(r1)
 ; PWR9BE-NEXT:    stfd f26, 112(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    stfd f27, 120(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    fmr f27, f4
@@ -4049,12 +4049,12 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR9LE-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; PWR9LE-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
-; PWR9LE-NEXT:    std r0, 16(r1)
 ; PWR9LE-NEXT:    stdu r1, -96(r1)
 ; PWR9LE-NEXT:    fmr f29, f4
 ; PWR9LE-NEXT:    fmr f28, f3
 ; PWR9LE-NEXT:    fmr f3, f5
 ; PWR9LE-NEXT:    fmr f4, f6
+; PWR9LE-NEXT:    std r0, 112(r1)
 ; PWR9LE-NEXT:    fmr f31, f8
 ; PWR9LE-NEXT:    fmr f30, f7
 ; PWR9LE-NEXT:    bl __gcc_qadd
@@ -4093,8 +4093,8 @@ define dso_local ppc_fp128 @v4ppcf128_fast(<4 x ppc_fp128> %a) local_unnamed_add
 ; PWR9BE-LABEL: v4ppcf128_fast:
 ; PWR9BE:       # %bb.0: # %entry
 ; PWR9BE-NEXT:    mflr r0
-; PWR9BE-NEXT:    std r0, 16(r1)
 ; PWR9BE-NEXT:    stdu r1, -176(r1)
+; PWR9BE-NEXT:    std r0, 192(r1)
 ; PWR9BE-NEXT:    stfd f28, 144(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    stfd f29, 152(r1) # 8-byte Folded Spill
 ; PWR9BE-NEXT:    fmr f29, f4

diff  --git a/llvm/test/DebugInfo/XCOFF/explicit-section.ll b/llvm/test/DebugInfo/XCOFF/explicit-section.ll
index f5734e1704f9d..9a78c07bf62a6 100644
--- a/llvm/test/DebugInfo/XCOFF/explicit-section.ll
+++ b/llvm/test/DebugInfo/XCOFF/explicit-section.ll
@@ -94,9 +94,9 @@ entry:
 ; CHECK-NEXT:  # %bb.0:                                # %entry
 ; CHECK-NEXT:  L..tmp3:
 ; CHECK-NEXT:          mflr 0
-; CHECK-NEXT:          stw 0, 8(1)
 ; CHECK-NEXT:          stwu 1, -64(1)
 ; CHECK-NEXT:          li 3, 0
+; CHECK-NEXT:          stw 0, 72(1)
 ; CHECK-NEXT:          stw 3, 60(1)
 ; CHECK-NEXT:  L..tmp4:
 ; CHECK-NEXT:  L..tmp5:


        


More information about the llvm-commits mailing list