[llvm-branch-commits] [llvm] 04a6828 - [PowerPC] Make sure the first probe is full size or is the last probe when stack is realigned

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jun 14 22:59:17 PDT 2021


Author: Kai Luo
Date: 2021-06-15T01:58:43-04:00
New Revision: 04a68288ded459c7e76135a9ee4b7e9d4bf4cdc2

URL: https://github.com/llvm/llvm-project/commit/04a68288ded459c7e76135a9ee4b7e9d4bf4cdc2
DIFF: https://github.com/llvm/llvm-project/commit/04a68288ded459c7e76135a9ee4b7e9d4bf4cdc2.diff

LOG: [PowerPC] Make sure the first probe is full size or is the last probe when stack is realigned

When `-fstack-clash-protection` is enabled and stack has to be realigned, some parts of redzone is written prior the probe, so probe might overwrite content already written in redzone. To avoid it, we have to make sure the first probe is at full probe size or is the last probe so that we can skip redzone.

It also fixes violation of ABI under PPC where `r1` isn't updated atomically.

This fixes https://bugs.llvm.org/show_bug.cgi?id=49903.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D100290

(cherry picked from commit bf58600badb1138a501ad81b07298207a7a64b2a)

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/test/CodeGen/PowerPC/pr46759.ll
    llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
    llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 50ce11b8374fa..16536bf23debf 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     BuildMI(MBB, MBBI, dl,
             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
                             : PPC::PROBED_STACKALLOC_32))
-        .addDef(ScratchReg)
-        .addDef(TempReg) // TempReg stores the old sp.
+        .addDef(TempReg)
+        .addDef(ScratchReg) // ScratchReg stores the old sp.
         .addImm(NegFrameSize);
     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
     // update the ScratchReg to meet the assumption that ScratchReg contains
     // the NegFrameSize. This solution is rather tricky.
     if (!HasRedZone) {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
-          .addReg(TempReg)
+          .addReg(ScratchReg)
           .addReg(SPReg);
       HasSTUX = true;
     }
@@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
 
 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
                                         MachineBasicBlock &PrologMBB) const {
-  // TODO: Generate CFI instructions.
   bool isPPC64 = Subtarget.isPPC64();
   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
   bool HasBP = RegInfo->hasBasePointer(MF);
   Register BPReg = RegInfo->getBaseRegister(MF);
   Align MaxAlign = MFI.getMaxAlign();
+  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
   // Subroutines to generate .cfi_* directives.
   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
@@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
           .addReg(SPReg)
           .addReg(NegSizeReg);
   };
-  // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
-  // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
-  // available and r1 is already copied to r30 which is BPReg. So BPReg stores
-  // the value of stackptr.
-  // First we have to probe tail interval whose size is less than probesize,
-  // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
-  // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
-  // probe each block sized probesize until stackptr meets
-  // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
-  // as negprobesize. At both stages, TempReg stores the value of
-  // (stackptr - (stackptr % align)).
-  auto dynamicProbe = [&](MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI, Register ScratchReg,
-                          Register TempReg) {
-    assert(HasBP && isPPC64 && "Probe alignment part not available");
+  // Used to probe stack when realignment is required.
+  // Note that, according to ABI's requirement, *sp must always equals the
+  // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
+  // Following is pseudo code:
+  // final_sp = (sp & align) + negframesize;
+  // neg_gap = final_sp - sp;
+  // while (neg_gap < negprobesize) {
+  //   stdu fp, negprobesize(sp);
+  //   neg_gap -= negprobesize;
+  // }
+  // stdux fp, sp, neg_gap
+  //
+  // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
+  // before probe code, we don't need to save it, so we get one additional reg
+  // that can be used to materialize the probeside if needed to use xform.
+  // Otherwise, we can NOT materialize probeside, so we can only use Dform for
+  // now.
+  //
+  // The allocations are:
+  // if (HasBP && HasRedzone) {
+  //   r0: materialize the probesize if needed so that we can use xform.
+  //   r12: `neg_gap`
+  // } else {
+  //   r0: back-chain pointer
+  //   r12: `neg_gap`.
+  // }
+  auto probeRealignedStack = [&](MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MBBI,
+                                 Register ScratchReg, Register TempReg) {
+    assert(HasBP && "The function is supposed to have base pointer when its "
+                    "stack is realigned.");
     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
-    // ScratchReg = stackptr % align
-    BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
-        .addReg(BPReg)
-        .addImm(0)
-        .addImm(64 - Log2(MaxAlign));
-    // TempReg = stackptr - (stackptr % align)
-    BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
-        .addReg(ScratchReg)
-        .addReg(BPReg);
-    // ScratchReg = (stackptr % align) % probesize
-    BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
-        .addReg(ScratchReg)
-        .addImm(0)
-        .addImm(64 - Log2(ProbeSize));
+
+    // FIXME: We can eliminate this limitation if we get more infomation about
+    // which part of redzone are already used. Used redzone can be treated
+    // probed. But there might be `holes' in redzone probed, this could
+    // complicate the implementation.
+    assert(ProbeSize >= Subtarget.getRedZoneSize() &&
+           "Probe size should be larger or equal to the size of red-zone so "
+           "that red-zone is not clobbered by probing.");
+
+    Register &FinalStackPtr = TempReg;
+    // FIXME: We only support NegProbeSize materializable by DForm currently.
+    // When HasBP && HasRedzone, we can use xform if we have an additional idle
+    // register.
+    NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
+    assert(isInt<16>(NegProbeSize) &&
+           "NegProbeSize should be materializable by DForm");
     Register CRReg = PPC::CR0;
-    // If (stackptr % align) % probesize == 0, we should not generate probe
-    // code. Layout of output assembly kinda like:
+    // Layout of output assembly kinda like:
     // bb.0:
     //   ...
-    //   cmpldi $scratchreg, 0
-    //   beq bb.2
-    // bb.1: # Probe tail interval
-    //   neg $scratchreg, $scratchreg
-    //   stdux $bpreg, r1, $scratchreg
+    //   sub $scratchreg, $finalsp, r1
+    //   cmpdi $scratchreg, <negprobesize>
+    //   bge bb.2
+    // bb.1:
+    //   stdu <backchain>, <negprobesize>(r1)
+    //   sub $scratchreg, $scratchreg, negprobesize
+    //   cmpdi $scratchreg, <negprobesize>
+    //   blt bb.1
     // bb.2:
-    //   <materialize negprobesize into $scratchreg>
-    //   cmpd r1, $tempreg
-    //   beq bb.4
-    // bb.3: # Loop to probe each block
-    //   stdux $bpreg, r1, $scratchreg
-    //   cmpd r1, $tempreg
-    //   bne bb.3
-    // bb.4:
-    //   ...
+    //   stdux <backchain>, r1, $scratchreg
     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
-    MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
-    MF.insert(MBBInsertPoint, ProbeResidualMBB);
-    MachineBasicBlock *ProbeLoopPreHeaderMBB =
-        MF.CreateMachineBasicBlock(ProbedBB);
-    MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
     MF.insert(MBBInsertPoint, ProbeExitMBB);
-    // bb.4
-    ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
-    ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+    // bb.2
+    {
+      Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+      allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
+                       BackChainPointer);
+      if (HasRedZone)
+        // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
+        // to TempReg to satisfy it.
+        BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
+            .addReg(BPReg)
+            .addReg(BPReg);
+      ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+      ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+    }
     // bb.0
-    BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
-    BuildMI(&MBB, DL, TII.get(PPC::BCC))
-        .addImm(PPC::PRED_EQ)
-        .addReg(CRReg)
-        .addMBB(ProbeLoopPreHeaderMBB);
-    MBB.addSuccessor(ProbeResidualMBB);
-    MBB.addSuccessor(ProbeLoopPreHeaderMBB);
+    {
+      BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
+          .addReg(SPReg)
+          .addReg(FinalStackPtr);
+      if (!HasRedZone)
+        BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
+      BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
+          .addReg(ScratchReg)
+          .addImm(NegProbeSize);
+      BuildMI(&MBB, DL, TII.get(PPC::BCC))
+          .addImm(PPC::PRED_GE)
+          .addReg(CRReg)
+          .addMBB(ProbeExitMBB);
+      MBB.addSuccessor(ProbeLoopBodyMBB);
+      MBB.addSuccessor(ProbeExitMBB);
+    }
     // bb.1
-    BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
-        .addReg(ScratchReg);
-    allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
-                     false, BPReg);
-    ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
-    // bb.2
-    MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
-                   NegProbeSize, ScratchReg);
-    BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
-        .addReg(SPReg)
-        .addReg(TempReg);
-    BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
-        .addImm(PPC::PRED_EQ)
-        .addReg(CRReg)
-        .addMBB(ProbeExitMBB);
-    ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
-    ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
-    // bb.3
-    allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
-                     false, BPReg);
-    BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
-        .addReg(SPReg)
-        .addReg(TempReg);
-    BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
-        .addImm(PPC::PRED_NE)
-        .addReg(CRReg)
-        .addMBB(ProbeLoopBodyMBB);
-    ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
-    ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+    {
+      Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+      allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
+                       0, true /*UseDForm*/, BackChainPointer);
+      BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
+              ScratchReg)
+          .addReg(ScratchReg)
+          .addImm(-NegProbeSize);
+      BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
+              CRReg)
+          .addReg(ScratchReg)
+          .addImm(NegProbeSize);
+      BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+          .addImm(PPC::PRED_LT)
+          .addReg(CRReg)
+          .addMBB(ProbeLoopBodyMBB);
+      ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+      ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+    }
     // Update liveins.
-    recomputeLiveIns(*ProbeResidualMBB);
-    recomputeLiveIns(*ProbeLoopPreHeaderMBB);
     recomputeLiveIns(*ProbeLoopBodyMBB);
     recomputeLiveIns(*ProbeExitMBB);
     return ProbeExitMBB;
   };
   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
-  // SP = SP - SP % MaxAlign.
+  // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
+  // the offset subtracted from SP is determined by SP's runtime value.
   if (HasBP && MaxAlign > 1) {
-    // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
-    // 64-bit mode.
-    if (isPPC64) {
-      // Use BPReg to calculate CFA.
-      if (needsCFI)
-        buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
-      // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
-      // TempReg.
-      Register TempReg = FPReg;
-      CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
-      // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
-      BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
-          .addReg(BPReg)
-          .addReg(BPReg);
-    } else {
-      // Initialize current frame pointer.
-      BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
+    // Calculate final stack pointer.
+    if (isPPC64)
+      BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
           .addReg(SPReg)
-          .addReg(SPReg);
-      // Use FPReg to calculate CFA.
-      if (needsCFI)
-        buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+          .addImm(0)
+          .addImm(64 - Log2(MaxAlign));
+    else
       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
-          .addReg(FPReg)
+          .addReg(SPReg)
           .addImm(0)
           .addImm(32 - Log2(MaxAlign))
           .addImm(31);
-      BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
-          .addReg(ScratchReg)
-          .addReg(SPReg);
-    }
+    BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
+            FPReg)
+        .addReg(ScratchReg)
+        .addReg(SPReg);
+    MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
+    BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+            FPReg)
+        .addReg(ScratchReg)
+        .addReg(FPReg);
+    CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
+    if (needsCFI)
+      buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
   } else {
     // Initialize current frame pointer.
     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
     // Use FPReg to calculate CFA.
     if (needsCFI)
       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
-  }
-  // Probe residual part.
-  if (NegResidualSize) {
-    bool ResidualUseDForm = CanUseDForm(NegResidualSize);
-    if (!ResidualUseDForm)
-      MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
-    allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
-                     ResidualUseDForm, FPReg);
-  }
-  bool UseDForm = CanUseDForm(NegProbeSize);
-  // If number of blocks is small, just probe them directly.
-  if (NumBlocks < 3) {
-    if (!UseDForm)
-      MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
-    for (int i = 0; i < NumBlocks; ++i)
-      allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
-                       FPReg);
-    if (needsCFI) {
-      // Restore using SPReg to calculate CFA.
-      buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+    // Probe residual part.
+    if (NegResidualSize) {
+      bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+      if (!ResidualUseDForm)
+        MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+      allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+                       ResidualUseDForm, FPReg);
     }
-  } else {
-    // Since CTR is a volatile register and current shrinkwrap implementation
-    // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
-    // CTR loop to probe.
-    // Calculate trip count and stores it in CTRReg.
-    MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
-    BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
-        .addReg(ScratchReg, RegState::Kill);
-    if (!UseDForm)
-      MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
-    // Create MBBs of the loop.
-    MachineFunction::iterator MBBInsertPoint =
-        std::next(CurrentMBB->getIterator());
-    MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
-    MF.insert(MBBInsertPoint, LoopMBB);
-    MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
-    MF.insert(MBBInsertPoint, ExitMBB);
-    // Synthesize the loop body.
-    allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
-                     UseDForm, FPReg);
-    BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
-        .addMBB(LoopMBB);
-    LoopMBB->addSuccessor(ExitMBB);
-    LoopMBB->addSuccessor(LoopMBB);
-    // Synthesize the exit MBB.
-    ExitMBB->splice(ExitMBB->end(), CurrentMBB,
-                    std::next(MachineBasicBlock::iterator(MI)),
-                    CurrentMBB->end());
-    ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
-    CurrentMBB->addSuccessor(LoopMBB);
-    if (needsCFI) {
-      // Restore using SPReg to calculate CFA.
-      buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+    bool UseDForm = CanUseDForm(NegProbeSize);
+    // If number of blocks is small, just probe them directly.
+    if (NumBlocks < 3) {
+      if (!UseDForm)
+        MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+      for (int i = 0; i < NumBlocks; ++i)
+        allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+                         FPReg);
+      if (needsCFI) {
+        // Restore using SPReg to calculate CFA.
+        buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+      }
+    } else {
+      // Since CTR is a volatile register and current shrinkwrap implementation
+      // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+      // CTR loop to probe.
+      // Calculate trip count and stores it in CTRReg.
+      MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+      BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+          .addReg(ScratchReg, RegState::Kill);
+      if (!UseDForm)
+        MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+      // Create MBBs of the loop.
+      MachineFunction::iterator MBBInsertPoint =
+          std::next(CurrentMBB->getIterator());
+      MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+      MF.insert(MBBInsertPoint, LoopMBB);
+      MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+      MF.insert(MBBInsertPoint, ExitMBB);
+      // Synthesize the loop body.
+      allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+                       UseDForm, FPReg);
+      BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+          .addMBB(LoopMBB);
+      LoopMBB->addSuccessor(ExitMBB);
+      LoopMBB->addSuccessor(LoopMBB);
+      // Synthesize the exit MBB.
+      ExitMBB->splice(ExitMBB->end(), CurrentMBB,
+                      std::next(MachineBasicBlock::iterator(MI)),
+                      CurrentMBB->end());
+      ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+      CurrentMBB->addSuccessor(LoopMBB);
+      if (needsCFI) {
+        // Restore using SPReg to calculate CFA.
+        buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+      }
+      // Update liveins.
+      recomputeLiveIns(*LoopMBB);
+      recomputeLiveIns(*ExitMBB);
     }
-    // Update liveins.
-    recomputeLiveIns(*LoopMBB);
-    recomputeLiveIns(*ExitMBB);
   }
   ++NumPrologProbed;
   MI.eraseFromParent();

diff  --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 33b44b720b6e1..d6d02921efcaf 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -6,32 +6,26 @@
 define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-LABEL: foo:
 ; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    clrldi r12, r1, 53
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    std r30, -16(r1)
 ; CHECK-LE-NEXT:    mr r30, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT:    clrldi r0, r30, 53
-; CHECK-LE-NEXT:    subc r12, r30, r0
-; CHECK-LE-NEXT:    clrldi r0, r0, 52
-; CHECK-LE-NEXT:    cmpdi r0, 0
-; CHECK-LE-NEXT:    beq cr0, .LBB0_2
-; CHECK-LE-NEXT:  # %bb.1: # %entry
-; CHECK-LE-NEXT:    neg r0, r0
-; CHECK-LE-NEXT:    stdux r30, r1, r0
-; CHECK-LE-NEXT:  .LBB0_2: # %entry
-; CHECK-LE-NEXT:    li r0, -4096
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    beq cr0, .LBB0_4
-; CHECK-LE-NEXT:  .LBB0_3: # %entry
+; CHECK-LE-NEXT:    sub r0, r1, r12
+; CHECK-LE-NEXT:    li r12, -6144
+; CHECK-LE-NEXT:    add r0, r12, r0
+; CHECK-LE-NEXT:    sub r12, r0, r1
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    bge cr0, .LBB0_2
+; CHECK-LE-NEXT:  .LBB0_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdux r30, r1, r0
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    bne cr0, .LBB0_3
-; CHECK-LE-NEXT:  .LBB0_4: # %entry
-; CHECK-LE-NEXT:    mr r12, r30
-; CHECK-LE-NEXT:    stdu r12, -2048(r1)
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
-; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-LE-NEXT:    stdu r30, -4096(r1)
+; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    blt cr0, .LBB0_1
+; CHECK-LE-NEXT:  .LBB0_2: # %entry
+; CHECK-LE-NEXT:    stdux r30, r1, r12
+; CHECK-LE-NEXT:    mr r0, r30
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
@@ -52,13 +46,13 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    add r4, r1, r4
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    beq cr0, .LBB0_6
-; CHECK-LE-NEXT:  .LBB0_5: # %entry
+; CHECK-LE-NEXT:    beq cr0, .LBB0_4
+; CHECK-LE-NEXT:  .LBB0_3: # %entry
 ; CHECK-LE-NEXT:    #
 ; CHECK-LE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    bne cr0, .LBB0_5
-; CHECK-LE-NEXT:  .LBB0_6: # %entry
+; CHECK-LE-NEXT:    bne cr0, .LBB0_3
+; CHECK-LE-NEXT:  .LBB0_4: # %entry
 ; CHECK-LE-NEXT:    addi r3, r1, 2048
 ; CHECK-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-LE-NEXT:    mr r1, r30

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
index e595d8a732a5c..4a8de768d82a3 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
@@ -44,12 +44,12 @@ entry:
 define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ; CHECK-LE-LABEL: f1:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    li r0, 259
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    li r12, 259
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB1_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -16(r1)
+; CHECK-LE-NEXT:    stdu r0, -16(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB1_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    li r3, 3
@@ -60,12 +60,12 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ;
 ; CHECK-BE-LABEL: f1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    li r0, 260
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    li r12, 260
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB1_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -16(r1)
+; CHECK-BE-NEXT:    stdu r0, -16(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB1_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    li r3, 3
@@ -76,16 +76,16 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ;
 ; CHECK-32-LABEL: f1:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    li r0, 257
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    li r12, 257
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB1_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    stwu r0, -16(r1)
 ; CHECK-32-NEXT:    bdnz .LBB1_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    li r3, 3
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -102,13 +102,13 @@ entry:
 define i8 @f2() #0 nounwind {
 ; CHECK-LE-LABEL: f2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    li r0, 16
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    li r12, 16
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB2_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB2_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    li r3, 3
@@ -119,13 +119,13 @@ define i8 @f2() #0 nounwind {
 ;
 ; CHECK-BE-LABEL: f2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    li r0, 16
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    li r12, 16
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB2_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB2_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    li r3, 3
@@ -136,16 +136,16 @@ define i8 @f2() #0 nounwind {
 ;
 ; CHECK-32-LABEL: f2:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    li r0, 16
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    li r12, 16
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB2_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB2_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 16(r1)
@@ -166,10 +166,10 @@ entry:
 define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
 ; CHECK-LE-LABEL: f3:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    stdu r12, -32768(r1)
-; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    stdu r0, -32768(r1)
+; CHECK-LE-NEXT:    stdu r0, -32768(r1)
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -178,10 +178,10 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
 ;
 ; CHECK-BE-LABEL: f3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    stdu r12, -32768(r1)
-; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    stdu r0, -32768(r1)
+; CHECK-BE-NEXT:    stdu r0, -32768(r1)
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -190,11 +190,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
 ;
 ; CHECK-32-LABEL: f3:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    stwu r12, -32768(r1)
-; CHECK-32-NEXT:    stwu r12, -32768(r1)
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    stwu r0, -32768(r1)
+; CHECK-32-NEXT:    stwu r0, -32768(r1)
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 16(r1)
@@ -261,15 +261,15 @@ entry:
 define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-LE-LABEL: f5:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    li r0, 16
-; CHECK-LE-NEXT:    mtctr r0
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    li r12, 16
+; CHECK-LE-NEXT:    mtctr r12
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    ori r12, r12, 0
 ; CHECK-LE-NEXT:  .LBB5_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    stdux r0, r1, r12
 ; CHECK-LE-NEXT:    bdnz .LBB5_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    li r3, 3
@@ -280,15 +280,15 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ;
 ; CHECK-BE-LABEL: f5:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    li r0, 16
-; CHECK-BE-NEXT:    mtctr r0
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    li r12, 16
+; CHECK-BE-NEXT:    mtctr r12
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    ori r12, r12, 0
 ; CHECK-BE-NEXT:  .LBB5_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    stdux r0, r1, r12
 ; CHECK-BE-NEXT:    bdnz .LBB5_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    li r3, 3
@@ -299,18 +299,18 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ;
 ; CHECK-32-LABEL: f5:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    li r0, 16
-; CHECK-32-NEXT:    mtctr r0
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    li r12, 16
+; CHECK-32-NEXT:    mtctr r12
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    ori r12, r12, 0
 ; CHECK-32-NEXT:  .LBB5_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    bdnz .LBB5_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 16(r1)
@@ -331,14 +331,14 @@ entry:
 define i8 @f6() #0 nounwind {
 ; CHECK-LE-LABEL: f6:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    lis r0, 4
-; CHECK-LE-NEXT:    nop
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    lis r12, 4
+; CHECK-LE-NEXT:    ori r12, r12, 0
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB6_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB6_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    li r3, 3
@@ -349,14 +349,14 @@ define i8 @f6() #0 nounwind {
 ;
 ; CHECK-BE-LABEL: f6:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    lis r0, 4
-; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    lis r12, 4
+; CHECK-BE-NEXT:    ori r12, r12, 0
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB6_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB6_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    li r3, 3
@@ -367,17 +367,17 @@ define i8 @f6() #0 nounwind {
 ;
 ; CHECK-32-LABEL: f6:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    lis r0, 4
-; CHECK-32-NEXT:    nop
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    lis r12, 4
+; CHECK-32-NEXT:    ori r12, r12, 0
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB6_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB6_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 16(r1)
@@ -398,17 +398,17 @@ entry:
 define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-LE-LABEL: f7:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    ori r0, r0, 13776
-; CHECK-LE-NEXT:    stdux r12, r1, r0
-; CHECK-LE-NEXT:    li r0, 15258
-; CHECK-LE-NEXT:    mtctr r0
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    ori r12, r12, 13776
+; CHECK-LE-NEXT:    stdux r0, r1, r12
+; CHECK-LE-NEXT:    li r12, 15258
+; CHECK-LE-NEXT:    mtctr r12
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    ori r12, r12, 0
 ; CHECK-LE-NEXT:  .LBB7_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    stdux r0, r1, r12
 ; CHECK-LE-NEXT:    bdnz .LBB7_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    li r3, 3
@@ -419,17 +419,17 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ;
 ; CHECK-BE-LABEL: f7:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    ori r0, r0, 13760
-; CHECK-BE-NEXT:    stdux r12, r1, r0
-; CHECK-BE-NEXT:    li r0, 15258
-; CHECK-BE-NEXT:    mtctr r0
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    ori r12, r12, 13760
+; CHECK-BE-NEXT:    stdux r0, r1, r12
+; CHECK-BE-NEXT:    li r12, 15258
+; CHECK-BE-NEXT:    mtctr r12
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    ori r12, r12, 0
 ; CHECK-BE-NEXT:  .LBB7_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    stdux r0, r1, r12
 ; CHECK-BE-NEXT:    bdnz .LBB7_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    li r3, 3
@@ -440,20 +440,20 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ;
 ; CHECK-32-LABEL: f7:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    ori r0, r0, 13808
-; CHECK-32-NEXT:    stwux r12, r1, r0
-; CHECK-32-NEXT:    li r0, 15258
-; CHECK-32-NEXT:    mtctr r0
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    ori r12, r12, 13808
+; CHECK-32-NEXT:    stwux r0, r1, r12
+; CHECK-32-NEXT:    li r12, 15258
+; CHECK-32-NEXT:    mtctr r12
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    ori r12, r12, 0
 ; CHECK-32-NEXT:  .LBB7_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    bdnz .LBB7_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    stb r3, 9(r1)

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index 6443059c97046..7e4556c597379 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -44,13 +44,13 @@ entry:
 define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-LE-LABEL: f1:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    li r0, 259
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    li r12, 259
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB1_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -16(r1)
+; CHECK-LE-NEXT:    stdu r0, -16(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB1_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
@@ -63,13 +63,13 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ;
 ; CHECK-BE-LABEL: f1:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    li r0, 260
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    li r12, 260
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB1_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -16(r1)
+; CHECK-BE-NEXT:    stdu r0, -16(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB1_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
@@ -82,17 +82,17 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ;
 ; CHECK-32-LABEL: f1:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    li r0, 257
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    li r12, 257
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB1_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    stwu r0, -16(r1)
 ; CHECK-32-NEXT:    bdnz .LBB1_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 4112
 ; CHECK-32-NEXT:    li r3, 3
@@ -111,14 +111,14 @@ entry:
 define i8 @f2() #0 {
 ; CHECK-LE-LABEL: f2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    li r0, 16
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    li r12, 16
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB2_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB2_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
@@ -131,14 +131,14 @@ define i8 @f2() #0 {
 ;
 ; CHECK-BE-LABEL: f2:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    li r0, 16
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    li r12, 16
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB2_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB2_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
@@ -151,18 +151,18 @@ define i8 @f2() #0 {
 ;
 ; CHECK-32-LABEL: f2:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    li r0, 16
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    li r12, 16
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB2_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB2_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
 ; CHECK-32-NEXT:    li r3, 3
@@ -184,11 +184,11 @@ entry:
 define i8 @f3() #0 "stack-probe-size"="32768" {
 ; CHECK-LE-LABEL: f3:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    stdu r12, -32768(r1)
-; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    stdu r0, -32768(r1)
+; CHECK-LE-NEXT:    stdu r0, -32768(r1)
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
@@ -199,11 +199,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" {
 ;
 ; CHECK-BE-LABEL: f3:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    stdu r12, -32768(r1)
-; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    stdu r0, -32768(r1)
+; CHECK-BE-NEXT:    stdu r0, -32768(r1)
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
@@ -214,13 +214,13 @@ define i8 @f3() #0 "stack-probe-size"="32768" {
 ;
 ; CHECK-32-LABEL: f3:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    stwu r12, -32768(r1)
-; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    stwu r0, -32768(r1)
+; CHECK-32-NEXT:    stwu r0, -32768(r1)
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
 ; CHECK-32-NEXT:    li r3, 3
@@ -291,16 +291,16 @@ entry:
 define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f5:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    li r0, 16
-; CHECK-LE-NEXT:    mtctr r0
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    li r12, 16
+; CHECK-LE-NEXT:    mtctr r12
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    ori r12, r12, 0
 ; CHECK-LE-NEXT:  .LBB5_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    stdux r0, r1, r12
 ; CHECK-LE-NEXT:    bdnz .LBB5_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
@@ -313,16 +313,16 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-BE-LABEL: f5:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    li r0, 16
-; CHECK-BE-NEXT:    mtctr r0
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    li r12, 16
+; CHECK-BE-NEXT:    mtctr r12
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    ori r12, r12, 0
 ; CHECK-BE-NEXT:  .LBB5_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    stdux r0, r1, r12
 ; CHECK-BE-NEXT:    bdnz .LBB5_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
@@ -335,20 +335,20 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-32-LABEL: f5:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    li r0, 16
-; CHECK-32-NEXT:    mtctr r0
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    li r12, 16
+; CHECK-32-NEXT:    mtctr r12
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    ori r12, r12, 0
 ; CHECK-32-NEXT:  .LBB5_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    bdnz .LBB5_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1048592
 ; CHECK-32-NEXT:    li r3, 3
@@ -370,15 +370,15 @@ entry:
 define i8 @f6() #0 {
 ; CHECK-LE-LABEL: f6:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    stdu r12, -48(r1)
-; CHECK-LE-NEXT:    lis r0, 4
-; CHECK-LE-NEXT:    nop
-; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    stdu r0, -48(r1)
+; CHECK-LE-NEXT:    lis r12, 4
+; CHECK-LE-NEXT:    ori r12, r12, 0
+; CHECK-LE-NEXT:    mtctr r12
 ; CHECK-LE-NEXT:  .LBB6_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB6_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
@@ -391,15 +391,15 @@ define i8 @f6() #0 {
 ;
 ; CHECK-BE-LABEL: f6:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    stdu r12, -64(r1)
-; CHECK-BE-NEXT:    lis r0, 4
-; CHECK-BE-NEXT:    nop
-; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    stdu r0, -64(r1)
+; CHECK-BE-NEXT:    lis r12, 4
+; CHECK-BE-NEXT:    ori r12, r12, 0
+; CHECK-BE-NEXT:    mtctr r12
 ; CHECK-BE-NEXT:  .LBB6_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    stdu r0, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB6_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
@@ -412,19 +412,19 @@ define i8 @f6() #0 {
 ;
 ; CHECK-32-LABEL: f6:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    stwu r12, -16(r1)
-; CHECK-32-NEXT:    lis r0, 4
-; CHECK-32-NEXT:    nop
-; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    stwu r0, -16(r1)
+; CHECK-32-NEXT:    lis r12, 4
+; CHECK-32-NEXT:    ori r12, r12, 0
+; CHECK-32-NEXT:    mtctr r12
 ; CHECK-32-NEXT:  .LBB6_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB6_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1073741840
 ; CHECK-32-NEXT:    li r3, 3
@@ -446,18 +446,18 @@ entry:
 define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f7:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mr r12, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    ori r0, r0, 13776
-; CHECK-LE-NEXT:    stdux r12, r1, r0
-; CHECK-LE-NEXT:    li r0, 15258
-; CHECK-LE-NEXT:    mtctr r0
-; CHECK-LE-NEXT:    lis r0, -1
-; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    mr r0, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    ori r12, r12, 13776
+; CHECK-LE-NEXT:    stdux r0, r1, r12
+; CHECK-LE-NEXT:    li r12, 15258
+; CHECK-LE-NEXT:    mtctr r12
+; CHECK-LE-NEXT:    lis r12, -1
+; CHECK-LE-NEXT:    ori r12, r12, 0
 ; CHECK-LE-NEXT:  .LBB7_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    stdux r0, r1, r12
 ; CHECK-LE-NEXT:    bdnz .LBB7_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
@@ -470,18 +470,18 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-BE-LABEL: f7:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mr r12, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    ori r0, r0, 13760
-; CHECK-BE-NEXT:    stdux r12, r1, r0
-; CHECK-BE-NEXT:    li r0, 15258
-; CHECK-BE-NEXT:    mtctr r0
-; CHECK-BE-NEXT:    lis r0, -1
-; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mr r0, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    ori r12, r12, 13760
+; CHECK-BE-NEXT:    stdux r0, r1, r12
+; CHECK-BE-NEXT:    li r12, 15258
+; CHECK-BE-NEXT:    mtctr r12
+; CHECK-BE-NEXT:    lis r12, -1
+; CHECK-BE-NEXT:    ori r12, r12, 0
 ; CHECK-BE-NEXT:  .LBB7_1: # %entry
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    stdux r0, r1, r12
 ; CHECK-BE-NEXT:    bdnz .LBB7_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
@@ -494,22 +494,22 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-32-LABEL: f7:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    ori r0, r0, 13808
-; CHECK-32-NEXT:    stwux r12, r1, r0
-; CHECK-32-NEXT:    li r0, 15258
-; CHECK-32-NEXT:    mtctr r0
-; CHECK-32-NEXT:    lis r0, -1
-; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r0, 0
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    ori r12, r12, 13808
+; CHECK-32-NEXT:    stwux r0, r1, r12
+; CHECK-32-NEXT:    li r12, 15258
+; CHECK-32-NEXT:    mtctr r12
+; CHECK-32-NEXT:    lis r12, -1
+; CHECK-32-NEXT:    ori r12, r12, 0
 ; CHECK-32-NEXT:  .LBB7_1: # %entry
 ; CHECK-32-NEXT:    #
-; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    bdnz .LBB7_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r1
-; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1000000016
 ; CHECK-32-NEXT:    li r3, 3
@@ -599,31 +599,24 @@ define i32 @f8(i64 %i) local_unnamed_addr #0 {
 define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-LABEL: f9:
 ; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    clrldi r12, r1, 53
 ; CHECK-LE-NEXT:    std r30, -16(r1)
 ; CHECK-LE-NEXT:    mr r30, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT:    clrldi r0, r30, 53
-; CHECK-LE-NEXT:    subc r12, r30, r0
-; CHECK-LE-NEXT:    clrldi r0, r0, 52
-; CHECK-LE-NEXT:    cmpdi r0, 0
-; CHECK-LE-NEXT:    beq cr0, .LBB9_2
-; CHECK-LE-NEXT:  # %bb.1:
-; CHECK-LE-NEXT:    neg r0, r0
-; CHECK-LE-NEXT:    stdux r30, r1, r0
+; CHECK-LE-NEXT:    sub r0, r1, r12
+; CHECK-LE-NEXT:    li r12, -10240
+; CHECK-LE-NEXT:    add r0, r12, r0
+; CHECK-LE-NEXT:    sub r12, r0, r1
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    bge cr0, .LBB9_2
+; CHECK-LE-NEXT:  .LBB9_1:
+; CHECK-LE-NEXT:    stdu r30, -4096(r1)
+; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    blt cr0, .LBB9_1
 ; CHECK-LE-NEXT:  .LBB9_2:
-; CHECK-LE-NEXT:    li r0, -4096
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    beq cr0, .LBB9_4
-; CHECK-LE-NEXT:  .LBB9_3:
-; CHECK-LE-NEXT:    stdux r30, r1, r0
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    bne cr0, .LBB9_3
-; CHECK-LE-NEXT:  .LBB9_4:
-; CHECK-LE-NEXT:    mr r12, r30
-; CHECK-LE-NEXT:    stdu r12, -2048(r1)
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
-; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-LE-NEXT:    stdux r30, r1, r12
+; CHECK-LE-NEXT:    mr r0, r30
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
 ; CHECK-LE-NEXT:    addi r4, r1, 2048
@@ -637,31 +630,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: f9:
 ; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    clrldi r12, r1, 53
 ; CHECK-BE-NEXT:    std r30, -16(r1)
 ; CHECK-BE-NEXT:    mr r30, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT:    clrldi r0, r30, 53
-; CHECK-BE-NEXT:    subc r12, r30, r0
-; CHECK-BE-NEXT:    clrldi r0, r0, 52
-; CHECK-BE-NEXT:    cmpdi r0, 0
-; CHECK-BE-NEXT:    beq cr0, .LBB9_2
-; CHECK-BE-NEXT:  # %bb.1:
-; CHECK-BE-NEXT:    neg r0, r0
-; CHECK-BE-NEXT:    stdux r30, r1, r0
+; CHECK-BE-NEXT:    sub r0, r1, r12
+; CHECK-BE-NEXT:    li r12, -10240
+; CHECK-BE-NEXT:    add r0, r12, r0
+; CHECK-BE-NEXT:    sub r12, r0, r1
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    bge cr0, .LBB9_2
+; CHECK-BE-NEXT:  .LBB9_1:
+; CHECK-BE-NEXT:    stdu r30, -4096(r1)
+; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    blt cr0, .LBB9_1
 ; CHECK-BE-NEXT:  .LBB9_2:
-; CHECK-BE-NEXT:    li r0, -4096
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    beq cr0, .LBB9_4
-; CHECK-BE-NEXT:  .LBB9_3:
-; CHECK-BE-NEXT:    stdux r30, r1, r0
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    bne cr0, .LBB9_3
-; CHECK-BE-NEXT:  .LBB9_4:
-; CHECK-BE-NEXT:    mr r12, r30
-; CHECK-BE-NEXT:    stdu r12, -2048(r1)
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
-; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-BE-NEXT:    stdux r30, r1, r12
+; CHECK-BE-NEXT:    mr r0, r30
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
 ; CHECK-BE-NEXT:    addi r4, r1, 2048
@@ -675,15 +661,23 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ;
 ; CHECK-32-LABEL: f9:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    clrlwi r0, r12, 21
-; CHECK-32-NEXT:    subc r1, r1, r0
-; CHECK-32-NEXT:    stwu r12, -2048(r1)
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
-; CHECK-32-NEXT:    .cfi_def_cfa_register r1
+; CHECK-32-NEXT:    clrlwi r12, r1, 21
 ; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r12, -10240
+; CHECK-32-NEXT:    add r0, r12, r0
+; CHECK-32-NEXT:    sub r12, r0, r1
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    bge cr0, .LBB9_2
+; CHECK-32-NEXT:  .LBB9_1:
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
+; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    blt cr0, .LBB9_1
+; CHECK-32-NEXT:  .LBB9_2:
+; CHECK-32-NEXT:    stwux r0, r1, r12
+; CHECK-32-NEXT:    .cfi_def_cfa_register r0
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    addic r0, r0, -8
 ; CHECK-32-NEXT:    stwx r30, 0, r0
@@ -712,30 +706,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-LABEL: f10:
 ; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    clrldi r12, r1, 54
 ; CHECK-LE-NEXT:    std r30, -16(r1)
 ; CHECK-LE-NEXT:    mr r30, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT:    clrldi r0, r30, 54
-; CHECK-LE-NEXT:    subc r12, r30, r0
-; CHECK-LE-NEXT:    clrldi r0, r0, 52
-; CHECK-LE-NEXT:    cmpdi r0, 0
-; CHECK-LE-NEXT:    beq cr0, .LBB10_2
-; CHECK-LE-NEXT:  # %bb.1:
-; CHECK-LE-NEXT:    neg r0, r0
-; CHECK-LE-NEXT:    stdux r30, r1, r0
+; CHECK-LE-NEXT:    sub r0, r1, r12
+; CHECK-LE-NEXT:    li r12, -5120
+; CHECK-LE-NEXT:    add r0, r12, r0
+; CHECK-LE-NEXT:    sub r12, r0, r1
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    bge cr0, .LBB10_2
+; CHECK-LE-NEXT:  .LBB10_1:
+; CHECK-LE-NEXT:    stdu r30, -4096(r1)
+; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    blt cr0, .LBB10_1
 ; CHECK-LE-NEXT:  .LBB10_2:
-; CHECK-LE-NEXT:    li r0, -4096
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    beq cr0, .LBB10_4
-; CHECK-LE-NEXT:  .LBB10_3:
-; CHECK-LE-NEXT:    stdux r30, r1, r0
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    bne cr0, .LBB10_3
-; CHECK-LE-NEXT:  .LBB10_4:
-; CHECK-LE-NEXT:    mr r12, r30
-; CHECK-LE-NEXT:    stdu r12, -1024(r1)
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
-; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-LE-NEXT:    stdux r30, r1, r12
+; CHECK-LE-NEXT:    mr r0, r30
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
 ; CHECK-LE-NEXT:    addi r4, r1, 1024
@@ -749,30 +737,24 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: f10:
 ; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    clrldi r12, r1, 54
 ; CHECK-BE-NEXT:    std r30, -16(r1)
 ; CHECK-BE-NEXT:    mr r30, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT:    clrldi r0, r30, 54
-; CHECK-BE-NEXT:    subc r12, r30, r0
-; CHECK-BE-NEXT:    clrldi r0, r0, 52
-; CHECK-BE-NEXT:    cmpdi r0, 0
-; CHECK-BE-NEXT:    beq cr0, .LBB10_2
-; CHECK-BE-NEXT:  # %bb.1:
-; CHECK-BE-NEXT:    neg r0, r0
-; CHECK-BE-NEXT:    stdux r30, r1, r0
+; CHECK-BE-NEXT:    sub r0, r1, r12
+; CHECK-BE-NEXT:    li r12, -5120
+; CHECK-BE-NEXT:    add r0, r12, r0
+; CHECK-BE-NEXT:    sub r12, r0, r1
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    bge cr0, .LBB10_2
+; CHECK-BE-NEXT:  .LBB10_1:
+; CHECK-BE-NEXT:    stdu r30, -4096(r1)
+; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    blt cr0, .LBB10_1
 ; CHECK-BE-NEXT:  .LBB10_2:
-; CHECK-BE-NEXT:    li r0, -4096
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    beq cr0, .LBB10_4
-; CHECK-BE-NEXT:  .LBB10_3:
-; CHECK-BE-NEXT:    stdux r30, r1, r0
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    bne cr0, .LBB10_3
-; CHECK-BE-NEXT:  .LBB10_4:
-; CHECK-BE-NEXT:    mr r12, r30
-; CHECK-BE-NEXT:    stdu r12, -1024(r1)
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
-; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-BE-NEXT:    stdux r30, r1, r12
+; CHECK-BE-NEXT:    mr r0, r30
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
 ; CHECK-BE-NEXT:    addi r4, r1, 1024
@@ -786,14 +768,23 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ;
 ; CHECK-32-LABEL: f10:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    clrlwi r0, r12, 22
-; CHECK-32-NEXT:    subc r1, r1, r0
-; CHECK-32-NEXT:    stwu r12, -1024(r1)
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
-; CHECK-32-NEXT:    .cfi_def_cfa_register r1
+; CHECK-32-NEXT:    clrlwi r12, r1, 22
 ; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r12, -5120
+; CHECK-32-NEXT:    add r0, r12, r0
+; CHECK-32-NEXT:    sub r12, r0, r1
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    bge cr0, .LBB10_2
+; CHECK-32-NEXT:  .LBB10_1:
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
+; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    blt cr0, .LBB10_1
+; CHECK-32-NEXT:  .LBB10_2:
+; CHECK-32-NEXT:    stwux r0, r1, r12
+; CHECK-32-NEXT:    .cfi_def_cfa_register r0
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    addic r0, r0, -8
 ; CHECK-32-NEXT:    stwx r30, 0, r0
@@ -821,35 +812,26 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-LABEL: f11:
 ; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    clrldi r12, r1, 49
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    std r30, -16(r1)
 ; CHECK-LE-NEXT:    mr r30, r1
-; CHECK-LE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT:    clrldi r0, r30, 49
-; CHECK-LE-NEXT:    subc r12, r30, r0
-; CHECK-LE-NEXT:    clrldi r0, r0, 52
-; CHECK-LE-NEXT:    cmpdi r0, 0
-; CHECK-LE-NEXT:    beq cr0, .LBB11_2
-; CHECK-LE-NEXT:  # %bb.1:
-; CHECK-LE-NEXT:    neg r0, r0
-; CHECK-LE-NEXT:    stdux r30, r1, r0
+; CHECK-LE-NEXT:    sub r0, r1, r12
+; CHECK-LE-NEXT:    lis r12, -2
+; CHECK-LE-NEXT:    ori r12, r12, 32768
+; CHECK-LE-NEXT:    add r0, r12, r0
+; CHECK-LE-NEXT:    sub r12, r0, r1
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    bge cr0, .LBB11_2
+; CHECK-LE-NEXT:  .LBB11_1:
+; CHECK-LE-NEXT:    stdu r30, -4096(r1)
+; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:    cmpdi r12, -4096
+; CHECK-LE-NEXT:    blt cr0, .LBB11_1
 ; CHECK-LE-NEXT:  .LBB11_2:
-; CHECK-LE-NEXT:    li r0, -4096
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    beq cr0, .LBB11_4
-; CHECK-LE-NEXT:  .LBB11_3:
-; CHECK-LE-NEXT:    stdux r30, r1, r0
-; CHECK-LE-NEXT:    cmpd r1, r12
-; CHECK-LE-NEXT:    bne cr0, .LBB11_3
-; CHECK-LE-NEXT:  .LBB11_4:
-; CHECK-LE-NEXT:    mr r12, r30
-; CHECK-LE-NEXT:    li r0, 24
-; CHECK-LE-NEXT:    mtctr r0
-; CHECK-LE-NEXT:  .LBB11_5:
-; CHECK-LE-NEXT:    stdu r12, -4096(r1)
-; CHECK-LE-NEXT:    bdnz .LBB11_5
-; CHECK-LE-NEXT:  # %bb.6:
-; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-LE-NEXT:    stdux r30, r1, r12
+; CHECK-LE-NEXT:    mr r0, r30
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
@@ -876,12 +858,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    add r4, r1, r7
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    beq cr0, .LBB11_8
-; CHECK-LE-NEXT:  .LBB11_7:
+; CHECK-LE-NEXT:    beq cr0, .LBB11_4
+; CHECK-LE-NEXT:  .LBB11_3:
 ; CHECK-LE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    bne cr0, .LBB11_7
-; CHECK-LE-NEXT:  .LBB11_8:
+; CHECK-LE-NEXT:    bne cr0, .LBB11_3
+; CHECK-LE-NEXT:  .LBB11_4:
 ; CHECK-LE-NEXT:    addi r3, r1, -32768
 ; CHECK-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-LE-NEXT:    mr r1, r30
@@ -891,35 +873,26 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ;
 ; CHECK-BE-LABEL: f11:
 ; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    clrldi r12, r1, 49
 ; CHECK-BE-NEXT:    std r31, -8(r1)
 ; CHECK-BE-NEXT:    std r30, -16(r1)
 ; CHECK-BE-NEXT:    mr r30, r1
-; CHECK-BE-NEXT:    .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT:    clrldi r0, r30, 49
-; CHECK-BE-NEXT:    subc r12, r30, r0
-; CHECK-BE-NEXT:    clrldi r0, r0, 52
-; CHECK-BE-NEXT:    cmpdi r0, 0
-; CHECK-BE-NEXT:    beq cr0, .LBB11_2
-; CHECK-BE-NEXT:  # %bb.1:
-; CHECK-BE-NEXT:    neg r0, r0
-; CHECK-BE-NEXT:    stdux r30, r1, r0
+; CHECK-BE-NEXT:    sub r0, r1, r12
+; CHECK-BE-NEXT:    lis r12, -2
+; CHECK-BE-NEXT:    ori r12, r12, 32768
+; CHECK-BE-NEXT:    add r0, r12, r0
+; CHECK-BE-NEXT:    sub r12, r0, r1
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    bge cr0, .LBB11_2
+; CHECK-BE-NEXT:  .LBB11_1:
+; CHECK-BE-NEXT:    stdu r30, -4096(r1)
+; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:    cmpdi r12, -4096
+; CHECK-BE-NEXT:    blt cr0, .LBB11_1
 ; CHECK-BE-NEXT:  .LBB11_2:
-; CHECK-BE-NEXT:    li r0, -4096
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    beq cr0, .LBB11_4
-; CHECK-BE-NEXT:  .LBB11_3:
-; CHECK-BE-NEXT:    stdux r30, r1, r0
-; CHECK-BE-NEXT:    cmpd r1, r12
-; CHECK-BE-NEXT:    bne cr0, .LBB11_3
-; CHECK-BE-NEXT:  .LBB11_4:
-; CHECK-BE-NEXT:    mr r12, r30
-; CHECK-BE-NEXT:    li r0, 24
-; CHECK-BE-NEXT:    mtctr r0
-; CHECK-BE-NEXT:  .LBB11_5:
-; CHECK-BE-NEXT:    stdu r12, -4096(r1)
-; CHECK-BE-NEXT:    bdnz .LBB11_5
-; CHECK-BE-NEXT:  # %bb.6:
-; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-BE-NEXT:    stdux r30, r1, r12
+; CHECK-BE-NEXT:    mr r0, r30
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r30
 ; CHECK-BE-NEXT:    .cfi_offset r31, -8
 ; CHECK-BE-NEXT:    .cfi_offset r30, -16
@@ -946,12 +919,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    add r4, r1, r7
 ; CHECK-BE-NEXT:    stdux r3, r1, r5
 ; CHECK-BE-NEXT:    cmpd r1, r4
-; CHECK-BE-NEXT:    beq cr0, .LBB11_8
-; CHECK-BE-NEXT:  .LBB11_7:
+; CHECK-BE-NEXT:    beq cr0, .LBB11_4
+; CHECK-BE-NEXT:  .LBB11_3:
 ; CHECK-BE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-BE-NEXT:    cmpd r1, r4
-; CHECK-BE-NEXT:    bne cr0, .LBB11_7
-; CHECK-BE-NEXT:  .LBB11_8:
+; CHECK-BE-NEXT:    bne cr0, .LBB11_3
+; CHECK-BE-NEXT:  .LBB11_4:
 ; CHECK-BE-NEXT:    addi r3, r1, -32768
 ; CHECK-BE-NEXT:    lbz r3, 0(r3)
 ; CHECK-BE-NEXT:    mr r1, r30
@@ -961,18 +934,24 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ;
 ; CHECK-32-LABEL: f11:
 ; CHECK-32:       # %bb.0:
-; CHECK-32-NEXT:    mr r12, r1
-; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
-; CHECK-32-NEXT:    clrlwi r0, r12, 17
-; CHECK-32-NEXT:    subc r1, r1, r0
-; CHECK-32-NEXT:    li r0, 24
-; CHECK-32-NEXT:    mtctr r0
-; CHECK-32-NEXT:  .LBB11_1:
-; CHECK-32-NEXT:    stwu r12, -4096(r1)
-; CHECK-32-NEXT:    bdnz .LBB11_1
-; CHECK-32-NEXT:  # %bb.2:
-; CHECK-32-NEXT:    .cfi_def_cfa_register r1
+; CHECK-32-NEXT:    clrlwi r12, r1, 17
 ; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    lis r12, -2
+; CHECK-32-NEXT:    ori r12, r12, 32768
+; CHECK-32-NEXT:    add r0, r12, r0
+; CHECK-32-NEXT:    sub r12, r0, r1
+; CHECK-32-NEXT:    mr r0, r1
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    bge cr0, .LBB11_2
+; CHECK-32-NEXT:  .LBB11_1:
+; CHECK-32-NEXT:    stwu r0, -4096(r1)
+; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:    cmpwi r12, -4096
+; CHECK-32-NEXT:    blt cr0, .LBB11_1
+; CHECK-32-NEXT:  .LBB11_2:
+; CHECK-32-NEXT:    stwux r0, r1, r12
+; CHECK-32-NEXT:    .cfi_def_cfa_register r0
+; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    addic r0, r0, -4
 ; CHECK-32-NEXT:    stwx r31, 0, r0


        


More information about the llvm-branch-commits mailing list