[llvm] b604fcb - [runtime] Move prolog/epilog block to a post-simplify strategy

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 31 09:31:10 PDT 2021


Author: Philip Reames
Date: 2021-08-31T09:29:36-07:00
New Revision: b604fcb7bc9c4c3bf15479c8406eb69f582fb71b

URL: https://github.com/llvm/llvm-project/commit/b604fcb7bc9c4c3bf15479c8406eb69f582fb71b
DIFF: https://github.com/llvm/llvm-project/commit/b604fcb7bc9c4c3bf15479c8406eb69f582fb71b.diff

LOG: [runtime] Move prolog/epilog block to a post-simplify strategy

The runtime unroller will try to produce a non-loop if the unroll count is 2 and thus the prolog/epilog loop would only run at most one iteration. The old implementation did this by avoiding loop construction entirely. This patches instead constructs the trivial loop and then explicitly breaks the backedge and simplifies. This does result in some additional code churn when triggered, but a) results in better quality code and b) removes a codepath which didn't work properly for multiple exit epilogs.

One oddity that I want to draw to reviewer attention is that this somehow changes revisit order. The new order looks equivalent to me, but I don't understand how creating and erasing an extra loop here creates this effect.

Differential Revision: https://reviews.llvm.org/D108521

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
    llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
    llvm/test/Transforms/LoopUnroll/revisit.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index bd6d1e7eb982d..86aa43c99ede4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,6 +22,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
@@ -35,6 +36,7 @@
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -299,17 +301,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
                          PreserveLCSSA);
 }
 
-/// Create a clone of the blocks in a loop and connect them together.
-/// If CreateRemainderLoop is false, loop structure will not be cloned,
-/// otherwise a new loop will be created including all cloned blocks, and the
-/// iterator of it switches to count NewIter down to 0.
+/// Create a clone of the blocks in a loop and connect them together. A new
+/// loop will be created including all cloned blocks, and the iterator of the
+/// new loop switched to count NewIter down to 0.
 /// The cloned blocks should be inserted between InsertTop and InsertBot.
-/// If loop structure is cloned InsertTop should be new preheader, InsertBot
-/// new loop exit.
-/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+/// InsertTop should be new preheader, InsertBot new loop exit.
+/// Returns the new cloned loop that is created.
 static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
-                const bool UseEpilogRemainder, const bool UnrollRemainder,
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
+                const bool UnrollRemainder,
                 BasicBlock *InsertTop,
                 BasicBlock *InsertBot, BasicBlock *Preheader,
                 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
@@ -323,8 +323,6 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
   Loop *ParentLoop = L->getParentLoop();
   NewLoopsMap NewLoops;
   NewLoops[ParentLoop] = ParentLoop;
-  if (!CreateRemainderLoop)
-    NewLoops[L] = ParentLoop;
 
   // For each block in the original loop, create a new copy,
   // and update the value map with the newly created values.
@@ -332,11 +330,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
     NewBlocks.push_back(NewBB);
 
-    // If we're unrolling the outermost loop, there's no remainder loop,
-    // and this block isn't in a nested loop, then the new block is not
-    // in any loop. Otherwise, add it to loopinfo.
-    if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
-      addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+    addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
 
     VMap[*BB] = NewBB;
     if (Header == *BB) {
@@ -357,27 +351,22 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     }
 
     if (Latch == *BB) {
-      // For the last block, if CreateRemainderLoop is false, create a direct
-      // jump to InsertBot. If not, create a loop back to cloned head.
+      // For the last block, create a loop back to cloned head.
       VMap.erase((*BB)->getTerminator());
       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
       IRBuilder<> Builder(LatchBR);
-      if (!CreateRemainderLoop) {
-        Builder.CreateBr(InsertBot);
-      } else {
-        PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
-                                          suffix + ".iter",
-                                          FirstLoopBB->getFirstNonPHI());
-        Value *IdxSub =
-            Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
-                              NewIdx->getName() + ".sub");
-        Value *IdxCmp =
-            Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
-        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
-        NewIdx->addIncoming(NewIter, InsertTop);
-        NewIdx->addIncoming(IdxSub, NewBB);
-      }
+      PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+                                        suffix + ".iter",
+                                        FirstLoopBB->getFirstNonPHI());
+      Value *IdxSub =
+        Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+                          NewIdx->getName() + ".sub");
+      Value *IdxCmp =
+        Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
+      Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+      NewIdx->addIncoming(NewIter, InsertTop);
+      NewIdx->addIncoming(IdxSub, NewBB);
       LatchBR->eraseFromParent();
     }
   }
@@ -386,28 +375,15 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
   // cloned loop.
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
     PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
-    if (!CreateRemainderLoop) {
-      if (UseEpilogRemainder) {
-        unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
-        NewPHI->setIncomingBlock(idx, InsertTop);
-        NewPHI->removeIncomingValue(Latch, false);
-      } else {
-        VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
-        cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
-      }
-    } else {
-      unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
-      NewPHI->setIncomingBlock(idx, InsertTop);
-      BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
-      idx = NewPHI->getBasicBlockIndex(Latch);
-      Value *InVal = NewPHI->getIncomingValue(idx);
-      NewPHI->setIncomingBlock(idx, NewLatch);
-      if (Value *V = VMap.lookup(InVal))
-        NewPHI->setIncomingValue(idx, V);
-    }
+    unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+    NewPHI->setIncomingBlock(idx, InsertTop);
+    BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+    idx = NewPHI->getBasicBlockIndex(Latch);
+    Value *InVal = NewPHI->getIncomingValue(idx);
+    NewPHI->setIncomingBlock(idx, NewLatch);
+    if (Value *V = VMap.lookup(InVal))
+      NewPHI->setIncomingValue(idx, V);
   }
-  if (!CreateRemainderLoop)
-    return nullptr;
 
   Loop *NewLoop = NewLoops[L];
   assert(NewLoop && "L should have been cloned");
@@ -819,18 +795,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
   std::vector<BasicBlock *> NewBlocks;
   ValueToValueMapTy VMap;
 
-  // For unroll factor 2 remainder loop will have 1 iterations.
-  // Do not create 1 iteration loop.
-  bool CreateRemainderLoop = (Count != 2);
-
   // Clone all the basic blocks in the loop. If Count is 2, we don't clone
   // the loop, otherwise we create a cloned loop to execute the extra
   // iterations. This function adds the appropriate CFG connections.
   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
   Loop *remainderLoop = CloneLoopBlocks(
-      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
-      InsertTop, InsertBot,
+      L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
 
   // Assign the maximum possible trip count as the back edge weight for the
@@ -974,6 +945,42 @@ bool llvm::UnrollRuntimeLoopRemainder(
     assert(DT->verify(DominatorTree::VerificationLevel::Full));
 #endif
 
+  // For unroll factor 2 remainder loop will have 1 iteration.
+  if (Count == 2 && DT && LI && SE) {
+    // TODO: This code could probably be pulled out into a helper function
+    // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
+    BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
+    assert(RemainderLatch);
+    SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
+                                             remainderLoop->getBlocks().end());
+    breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
+    remainderLoop = nullptr;
+
+    // Simplify loop values after breaking the backedge
+    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+    SmallVector<WeakTrackingVH, 16> DeadInsts;
+    for (BasicBlock *BB : RemainderBlocks) {
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+        Instruction *Inst = &*I++;
+        if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
+          if (LI->replacementPreservesLCSSAForm(Inst, V))
+            Inst->replaceAllUsesWith(V);
+        if (isInstructionTriviallyDead(Inst))
+          DeadInsts.emplace_back(Inst);
+      }
+      // We can't do recursive deletion until we're done iterating, as we might
+      // have a phi which (potentially indirectly) uses instructions later in
+      // the block we're iterating through.
+      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+    }
+
+    // Merge latch into exit block.
+    auto *ExitBB = RemainderLatch->getSingleSuccessor();
+    assert(ExitBB && "required after breaking cond br backedge");
+    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+    MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
+  }
+
   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
   // cannot rely on the LoopUnrollPass to do this because it only does
   // canonicalization for parent/subloops and not the sibling loops.

diff  --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
index e1ff19767087d..8c91a521c5279 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
@@ -22,11 +22,11 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
 ; CHECK:       vector.ph.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]]
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
@@ -61,44 +61,37 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       middle.block.unr-lcssa:
 ; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.body.epil.preheader:
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_EPIL]]
+; CHECK-NEXT:    [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
 ; CHECK-NEXT:    store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]]
-; CHECK:       middle.block.epilog-lcssa:
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
-; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT:    [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]]
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
+; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]]
+; CHECK-NEXT:    [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
 ; CHECK:       for.body.prol.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
-; CHECK-NEXT:    [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
-; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
+; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
 ; CHECK-NEXT:    [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
 ; CHECK-NEXT:    [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
@@ -108,76 +101,76 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.body.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT]]
 ; CHECK:       for.body.prol.loopexit:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7
-; CHECK-NEXT:    br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7
+; CHECK-NEXT:    br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; CHECK:       for.body.preheader.new:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP28]]
+; CHECK-NEXT:    [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP27]]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[SHL_1:%.*]] = shl i32 1, [[TMP29]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
 ; CHECK-NEXT:    [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
 ; CHECK-NEXT:    [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; CHECK-NEXT:    [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; CHECK-NEXT:    [[SHL_2:%.*]] = shl i32 1, [[TMP30]]
+; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
+; CHECK-NEXT:    [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
 ; CHECK-NEXT:    [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
 ; CHECK-NEXT:    [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; CHECK-NEXT:    [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; CHECK-NEXT:    [[SHL_3:%.*]] = shl i32 1, [[TMP31]]
+; CHECK-NEXT:    [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
+; CHECK-NEXT:    [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
 ; CHECK-NEXT:    [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
 ; CHECK-NEXT:    [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; CHECK-NEXT:    [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; CHECK-NEXT:    [[SHL_4:%.*]] = shl i32 1, [[TMP32]]
+; CHECK-NEXT:    [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
+; CHECK-NEXT:    [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
 ; CHECK-NEXT:    [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
 ; CHECK-NEXT:    [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; CHECK-NEXT:    [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; CHECK-NEXT:    [[SHL_5:%.*]] = shl i32 1, [[TMP33]]
+; CHECK-NEXT:    [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
+; CHECK-NEXT:    [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
 ; CHECK-NEXT:    [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
 ; CHECK-NEXT:    [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; CHECK-NEXT:    [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; CHECK-NEXT:    [[SHL_6:%.*]] = shl i32 1, [[TMP34]]
+; CHECK-NEXT:    [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
+; CHECK-NEXT:    [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
 ; CHECK-NEXT:    [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
 ; CHECK-NEXT:    [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; CHECK-NEXT:    [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; CHECK-NEXT:    [[SHL_7:%.*]] = shl i32 1, [[TMP35]]
+; CHECK-NEXT:    [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
+; CHECK-NEXT:    [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
 ; CHECK-NEXT:    [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
 ; CHECK-NEXT:    [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49

diff  --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
index aa0ead18ba3cd..20f6d91b0de36 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -22,11 +22,11 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
 ; CHECK:       vector.ph.new:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]]
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
@@ -61,44 +61,37 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       middle.block.unr-lcssa:
 ; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.body.epil.preheader:
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_EPIL]]
+; CHECK-NEXT:    [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
 ; CHECK-NEXT:    [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
 ; CHECK-NEXT:    store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]]
-; CHECK:       middle.block.epilog-lcssa:
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
-; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT:    [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]]
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
+; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]]
+; CHECK-NEXT:    [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
 ; CHECK:       for.body.prol.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
-; CHECK-NEXT:    [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
-; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
+; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
 ; CHECK-NEXT:    [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
 ; CHECK-NEXT:    [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
@@ -108,76 +101,76 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.body.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT]]
 ; CHECK:       for.body.prol.loopexit:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7
-; CHECK-NEXT:    br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7
+; CHECK-NEXT:    br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; CHECK:       for.body.preheader.new:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP28]]
+; CHECK-NEXT:    [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 1, [[TMP27]]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[SHL_1:%.*]] = shl i32 1, [[TMP29]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
 ; CHECK-NEXT:    [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
 ; CHECK-NEXT:    [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; CHECK-NEXT:    [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; CHECK-NEXT:    [[SHL_2:%.*]] = shl i32 1, [[TMP30]]
+; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
+; CHECK-NEXT:    [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
 ; CHECK-NEXT:    [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
 ; CHECK-NEXT:    [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; CHECK-NEXT:    [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; CHECK-NEXT:    [[SHL_3:%.*]] = shl i32 1, [[TMP31]]
+; CHECK-NEXT:    [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
+; CHECK-NEXT:    [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
 ; CHECK-NEXT:    [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
 ; CHECK-NEXT:    [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; CHECK-NEXT:    [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; CHECK-NEXT:    [[SHL_4:%.*]] = shl i32 1, [[TMP32]]
+; CHECK-NEXT:    [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
+; CHECK-NEXT:    [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
 ; CHECK-NEXT:    [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
 ; CHECK-NEXT:    [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; CHECK-NEXT:    [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; CHECK-NEXT:    [[SHL_5:%.*]] = shl i32 1, [[TMP33]]
+; CHECK-NEXT:    [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
+; CHECK-NEXT:    [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
 ; CHECK-NEXT:    [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
 ; CHECK-NEXT:    [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; CHECK-NEXT:    [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; CHECK-NEXT:    [[SHL_6:%.*]] = shl i32 1, [[TMP34]]
+; CHECK-NEXT:    [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
+; CHECK-NEXT:    [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
 ; CHECK-NEXT:    [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
 ; CHECK-NEXT:    [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; CHECK-NEXT:    [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; CHECK-NEXT:    [[SHL_7:%.*]] = shl i32 1, [[TMP35]]
+; CHECK-NEXT:    [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
+; CHECK-NEXT:    [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
 ; CHECK-NEXT:    [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
 ; CHECK-NEXT:    [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
 ; CHECK-NEXT:    [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49

diff  --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll
index 4ad867fa72a4c..2cbe68f86983f 100644
--- a/llvm/test/Transforms/LoopUnroll/revisit.ll
+++ b/llvm/test/Transforms/LoopUnroll/revisit.ll
@@ -140,10 +140,10 @@ l0.0.latch:
 ;
 ; Revisit the children of the outer loop that are part of the epilogue.
 ; 
-; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
-; CHECK-NOT: LoopFullUnrollPass
 ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.epil<header>
 ; CHECK-NOT: LoopFullUnrollPass
+; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
+; CHECK-NOT: LoopFullUnrollPass
 l0.latch:
   br label %l0
 ; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0<header>

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 35056394da0c7..73bf94cd7982a 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -157,26 +157,19 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
-; EPILOG-BLOCK-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG-BLOCK:       loop_exiting_bb1.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1
 ; EPILOG-BLOCK:       loop_exiting_bb2.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_latch.epil, label %exit3
 ; EPILOG-BLOCK:       loop_latch.epil:
-; EPILOG-BLOCK-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.epilog-lcssa
-; EPILOG-BLOCK:       exit2.loopexit.epilog-lcssa:
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit
 ; EPILOG-BLOCK:       exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    ret void
@@ -313,11 +306,9 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK:       loop_exiting_bb2.prol:
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %loop_latch.prol, label %exit3
 ; PROLOG-BLOCK:       loop_latch.prol:
-; PROLOG-BLOCK-NEXT:    %iv_next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
 ; PROLOG-BLOCK-NEXT:    br label %loop_header.prol.loopexit
 ; PROLOG-BLOCK:       loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %1 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %1, label %exit2.loopexit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -568,29 +559,23 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %for.body.epil
 ; EPILOG-BLOCK:       for.body.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-BLOCK-NEXT:    br label %for.end.epilog-lcssa
-; EPILOG-BLOCK:       for.end.epilog-lcssa:
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %for.end
 ; EPILOG-BLOCK:       for.end:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.end.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %sum.0.lcssa
 ; EPILOG-BLOCK:       for.exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
-; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %retval
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -749,16 +734,12 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %for.exit2, label %for.body.prol
 ; PROLOG-BLOCK:       for.body.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %for.body.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %for.body.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %for.body.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %1, %for.body.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %for.body.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %for.body.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %for.end, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -1006,31 +987,22 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
-; EPILOG-BLOCK-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG-BLOCK:       loop_exiting_bb1.epil:
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.unr, label %loop_latch.epil [
 ; EPILOG-BLOCK-NEXT:    i64 24, label %exit1
 ; EPILOG-BLOCK-NEXT:    i64 42, label %exit3
 ; EPILOG-BLOCK-NEXT:    ]
 ; EPILOG-BLOCK:       loop_latch.epil:
-; EPILOG-BLOCK-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.epilog-lcssa
-; EPILOG-BLOCK:       exit2.loopexit.epilog-lcssa:
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit
 ; EPILOG-BLOCK:       exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    ret void
@@ -1192,13 +1164,10 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-BLOCK-NEXT:    i64 42, label %exit3
 ; PROLOG-BLOCK-NEXT:    ]
 ; PROLOG-BLOCK:       loop_latch.prol:
-; PROLOG-BLOCK-NEXT:    %iv_next.prol = add nuw nsw i64 0, 1
-; PROLOG-BLOCK-NEXT:    %sum.next.prol = add i64 0, %add
-; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
 ; PROLOG-BLOCK-NEXT:    br label %loop_header.prol.loopexit
 ; PROLOG-BLOCK:       loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %1 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %1, label %exit2.loopexit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -1455,18 +1424,14 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ]
@@ -1641,16 +1606,12 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %for.exit2, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %latchExit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -1907,18 +1868,14 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ]
@@ -2093,16 +2050,12 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %latchExit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -2360,24 +2313,20 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit
 ; EPILOG-BLOCK:       latchExit:
 ; EPILOG-BLOCK-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
@@ -2546,16 +2495,12 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %latchExit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -2814,24 +2759,20 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit
 ; EPILOG-BLOCK:       latchExit:
 ; EPILOG-BLOCK-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
@@ -2997,19 +2938,15 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK:       header.prol:
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.prol
 ; PROLOG-BLOCK:       for.exiting_block.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %1, 0
+; PROLOG-BLOCK-NEXT:    %1 = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %latchExit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -3324,24 +3261,18 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
-; EPILOG-BLOCK-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
 ; EPILOG-BLOCK:       loop_exiting.epil:
-; EPILOG-BLOCK-NEXT:    %ivy.epil = add i64 %iv.epil, %add
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT:    %ivy.epil = add i64 %iv.unr, %add
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.unr, label %loop_latch.epil [
 ; EPILOG-BLOCK-NEXT:    i64 24, label %exit1
 ; EPILOG-BLOCK-NEXT:    i64 42, label %exit1
 ; EPILOG-BLOCK-NEXT:    ]
 ; EPILOG-BLOCK:       loop_latch.epil:
-; EPILOG-BLOCK-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT:    br label %latchexit.epilog-lcssa
-; EPILOG-BLOCK:       latchexit.epilog-lcssa:
+; EPILOG-BLOCK-NEXT:    %sum.next.epil = add i64 %sum.unr, %add
 ; EPILOG-BLOCK-NEXT:    br label %latchexit
 ; EPILOG-BLOCK:       latchexit:
-; EPILOG-BLOCK-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %latchexit.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %loop_latch.epil ]
 ; EPILOG-BLOCK-NEXT:    ret i64 %sum.next.lcssa
 ; EPILOG-BLOCK:       loop_exiting.1:
 ; EPILOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
@@ -3507,20 +3438,16 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-BLOCK:       loop_header.prol:
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.prol, label %loop_exiting.prol
 ; PROLOG-BLOCK:       loop_exiting.prol:
-; PROLOG-BLOCK-NEXT:    %ivy.prol = add i64 0, %add
 ; PROLOG-BLOCK-NEXT:    switch i64 0, label %loop_latch.prol [
 ; PROLOG-BLOCK-NEXT:    i64 24, label %exit1
 ; PROLOG-BLOCK-NEXT:    i64 42, label %exit1
 ; PROLOG-BLOCK-NEXT:    ]
 ; PROLOG-BLOCK:       loop_latch.prol:
-; PROLOG-BLOCK-NEXT:    %iv_next.prol = add nuw nsw i64 0, 1
-; PROLOG-BLOCK-NEXT:    %sum.next.prol = add i64 0, %add
-; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
 ; PROLOG-BLOCK-NEXT:    br label %loop_header.prol.loopexit
 ; PROLOG-BLOCK:       loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %sum.next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %add, %loop_latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %1 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %1, label %latchexit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -3543,7 +3470,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %exit1
 ; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    %result = phi i64 [ %ivy.prol, %loop_exiting.prol ], [ %ivy.prol, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ]
+; PROLOG-BLOCK-NEXT:    %result = phi i64 [ %add, %loop_exiting.prol ], [ %add, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ]
 ; PROLOG-BLOCK-NEXT:    ret i64 %result
 ; PROLOG-BLOCK:       latchexit.unr-lcssa:
 ; PROLOG-BLOCK-NEXT:    %sum.next.lcssa.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
@@ -3779,29 +3706,23 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT:    %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
 ; EPILOG-BLOCK-NEXT:    %load.epil = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.epil
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-BLOCK-NEXT:    br label %latch_exit.epilog-lcssa
-; EPILOG-BLOCK:       latch_exit.epilog-lcssa:
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latch_exit
 ; EPILOG-BLOCK:       latch_exit:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch_exit.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %sum.0.lcssa
 ; EPILOG-BLOCK:       for.exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
-; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %addx = add i32 %retval, %x
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %exit_true, label %exit_false
 ; EPILOG-BLOCK:       exit_true:
@@ -3970,16 +3891,12 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-BLOCK-NEXT:    %cmp.prol = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.prol, label %for.exit2, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT:    %load.prol = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT:    %add.prol = add nsw i32 %load.prol, 0
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT:    %load.prol = load i32, i32* %a, align 4
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %load.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %load.prol, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %1 = icmp ult i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %1, label %latch_exit, label %entry.new
 ; PROLOG-BLOCK:       entry.new:
@@ -4175,22 +4092,15 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
 ; EPILOG-BLOCK:       latchexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %i6.unr.ph = phi i64 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %latchexit.unr-lcssa
 ; EPILOG-BLOCK:       latchexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %i6.unr = phi i64 [ 1, %preheader ], [ %i6.unr.ph, %latchexit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchexit
 ; EPILOG-BLOCK:       header.epil.preheader:
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %i6.epil = phi i64 [ %i6.unr, %header.epil.preheader ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %add.epil = add nuw nsw i64 %i6.epil, 1
-; EPILOG-BLOCK-NEXT:    %i9.epil = icmp slt i64 %add.epil, %sext
-; EPILOG-BLOCK-NEXT:    br label %latchexit.epilog-lcssa
-; EPILOG-BLOCK:       latchexit.epilog-lcssa:
 ; EPILOG-BLOCK-NEXT:    br label %latchexit
 ; EPILOG-BLOCK:       latchexit:
 ; EPILOG-BLOCK-NEXT:    unreachable
@@ -4302,11 +4212,9 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG-BLOCK:       header.prol:
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %loopexit1, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    %add.prol = add nuw nsw i64 1, 1
-; PROLOG-BLOCK-NEXT:    %i9.prol = icmp slt i64 %add.prol, %sext
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
-; PROLOG-BLOCK-NEXT:    %i6.unr = phi i64 [ 1, %preheader ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT:    %i6.unr = phi i64 [ 1, %preheader ], [ 2, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %latchexit, label %preheader.new
 ; PROLOG-BLOCK:       preheader.new:
@@ -4512,31 +4420,29 @@ define void @test8() {
 ; PROLOG-BLOCK:       outerloop.loopexit.loopexit:
 ; PROLOG-BLOCK-NEXT:    br label %outerloop.loopexit
 ; PROLOG-BLOCK:       outerloop.loopexit:
-; PROLOG-BLOCK-NEXT:    br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.unr-lcssa.1
+; PROLOG-BLOCK-NEXT:    br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.1
 ; PROLOG-BLOCK:       outerloop:
 ; PROLOG-BLOCK-NEXT:    %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
 ; PROLOG-BLOCK-NEXT:    %0 = sub i64 100, %i
 ; PROLOG-BLOCK-NEXT:    %1 = sub i64 99, %i
 ; PROLOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; PROLOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit.unr-lcssa
+; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit
 ; PROLOG-BLOCK:       innerH.prol.preheader:
 ; PROLOG-BLOCK-NEXT:    br label %innerH.prol
 ; PROLOG-BLOCK:       innerH.prol:
 ; PROLOG-BLOCK-NEXT:    %i4.prol = add nuw nsw i64 %i, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit, label %latch.prol
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit.unr-lcssa
-; PROLOG-BLOCK:       innerH.prol.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    %i3.unr.ph = phi i64 [ %i4.prol, %latch.prol ], [ %i, %outerloop ]
 ; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit
 ; PROLOG-BLOCK:       innerH.prol.loopexit:
+; PROLOG-BLOCK-NEXT:    %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %exit.loopexit, label %outerloop.new
 ; PROLOG-BLOCK:       outerloop.new:
 ; PROLOG-BLOCK-NEXT:    br label %innerH
 ; PROLOG-BLOCK:       innerH:
-; PROLOG-BLOCK-NEXT:    %i3 = phi i64 [ %i3.unr.ph, %outerloop.new ], [ %i4.1, %latch.1 ]
+; PROLOG-BLOCK-NEXT:    %i3 = phi i64 [ %i3.unr, %outerloop.new ], [ %i4.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    %i4 = add nuw nsw i64 %i3, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch
 ; PROLOG-BLOCK:       latch:
@@ -4560,16 +4466,14 @@ define void @test8() {
 ; PROLOG-BLOCK:       innerH.prol.1:
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.prol.1
 ; PROLOG-BLOCK:       latch.prol.1:
-; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit.unr-lcssa.1
-; PROLOG-BLOCK:       innerH.prol.loopexit.unr-lcssa.1:
-; PROLOG-BLOCK-NEXT:    %i3.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %outerloop.loopexit ]
 ; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit.1
 ; PROLOG-BLOCK:       innerH.prol.loopexit.1:
+; PROLOG-BLOCK-NEXT:    %i3.unr.1 = phi i64 [ 0, %outerloop.loopexit ], [ 1, %latch.prol.1 ]
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %exit.loopexit, label %outerloop.new.1
 ; PROLOG-BLOCK:       outerloop.new.1:
 ; PROLOG-BLOCK-NEXT:    br label %innerH.1
 ; PROLOG-BLOCK:       innerH.1:
-; PROLOG-BLOCK-NEXT:    %i3.1 = phi i64 [ %i3.unr.ph.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
+; PROLOG-BLOCK-NEXT:    %i3.1 = phi i64 [ %i3.unr.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
 ; PROLOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12
 ; PROLOG-BLOCK:       latch.12:
@@ -4799,23 +4703,21 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-BLOCK:       preheader:
 ; PROLOG-BLOCK-NEXT:    %xtraiter = and i32 %0, 1
 ; PROLOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
-; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit.unr-lcssa
+; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.preheader:
 ; PROLOG-BLOCK-NEXT:    br label %header.prol
 ; PROLOG-BLOCK:       header.prol:
 ; PROLOG-BLOCK-NEXT:    br i1 true, label %latch.prol, label %innerexit.loopexit1
 ; PROLOG-BLOCK:       latch.prol:
-; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit.unr-lcssa
-; PROLOG-BLOCK:       header.prol.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    %phi.unr.ph = phi i64 [ 1, %latch.prol ], [ 0, %preheader ]
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit
 ; PROLOG-BLOCK:       header.prol.loopexit:
+; PROLOG-BLOCK-NEXT:    %phi.unr = phi i64 [ 0, %preheader ], [ 1, %latch.prol ]
 ; PROLOG-BLOCK-NEXT:    %2 = icmp ult i32 %1, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %2, label %outerLatch.loopexit, label %preheader.new
 ; PROLOG-BLOCK:       preheader.new:
 ; PROLOG-BLOCK-NEXT:    br label %header
 ; PROLOG-BLOCK:       header:
-; PROLOG-BLOCK-NEXT:    %phi = phi i64 [ %phi.unr.ph, %preheader.new ], [ %iv.next.1, %latch.1 ]
+; PROLOG-BLOCK-NEXT:    %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br i1 true, label %latch, label %innerexit.loopexit.loopexit
 ; PROLOG-BLOCK:       innerexit.loopexit.loopexit:
 ; PROLOG-BLOCK-NEXT:    %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ]
@@ -4853,23 +4755,21 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-BLOCK:       preheader.1:
 ; PROLOG-BLOCK-NEXT:    %xtraiter.1 = and i32 %0, 1
 ; PROLOG-BLOCK-NEXT:    %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0
-; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.unr-lcssa.1
+; PROLOG-BLOCK-NEXT:    br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.1
 ; PROLOG-BLOCK:       header.prol.preheader.1:
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.1
 ; PROLOG-BLOCK:       header.prol.1:
 ; PROLOG-BLOCK-NEXT:    br i1 true, label %latch.prol.1, label %innerexit.loopexit1
 ; PROLOG-BLOCK:       latch.prol.1:
-; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit.unr-lcssa.1
-; PROLOG-BLOCK:       header.prol.loopexit.unr-lcssa.1:
-; PROLOG-BLOCK-NEXT:    %phi.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %preheader.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %header.prol.loopexit.1
 ; PROLOG-BLOCK:       header.prol.loopexit.1:
+; PROLOG-BLOCK-NEXT:    %phi.unr.1 = phi i64 [ 0, %preheader.1 ], [ 1, %latch.prol.1 ]
 ; PROLOG-BLOCK-NEXT:    %3 = icmp ult i32 %1, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %3, label %outerLatch.loopexit.1, label %preheader.new.1
 ; PROLOG-BLOCK:       preheader.new.1:
 ; PROLOG-BLOCK-NEXT:    br label %header.1
 ; PROLOG-BLOCK:       header.1:
-; PROLOG-BLOCK-NEXT:    %phi.1 = phi i64 [ %phi.unr.ph.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
+; PROLOG-BLOCK-NEXT:    %phi.1 = phi i64 [ %phi.unr.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
 ; PROLOG-BLOCK-NEXT:    br i1 true, label %latch.14, label %innerexit.loopexit.loopexit5
 ; PROLOG-BLOCK:       latch.14:
 ; PROLOG-BLOCK-NEXT:    %iv.next.13 = add nuw nsw i64 %phi.1, 1

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
index b56da618d5d67..9c0ac5bc86c7d 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -15,9 +15,9 @@
 ; EPILOG: for.body.epil.preheader:
 ; EPILOG:   br label %for.body.epil, !dbg [[PH_LOC]]
 ; EPILOG: for.body.epil:
-; EPILOG:   br label %for.end.loopexit.epilog-lcssa, !dbg [[PH_LOC]]
+; EPILOG:   br label %for.end.loopexit, !dbg [[EXIT_LOC:![0-9]+]]
 ; EPILOG: for.end.loopexit:
-; EPILOG:   br label %for.end, !dbg [[EXIT_LOC:![0-9]+]]
+; EPILOG:   br label %for.end, !dbg [[EXIT_LOC]]
 
 ; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
 ; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
index b7e13931d1aed..888656387dc3e 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
@@ -35,7 +35,7 @@ define dso_local void @assumeDivisibleTC(i8* noalias nocapture %a, i8* noalias n
 ; CHECK-NEXT:    store i8 [[ADD_1]], i8* [[ARRAYIDX4_1]], align 1
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[INC]], 1
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], [[LOOP0:!llvm.loop !.*]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
@@ -113,7 +113,7 @@ define dso_local void @cannotProveDivisibleTC(i8* noalias nocapture %a, i8* noal
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[INC]], 1
 ; CHECK-NEXT:    [[NITER_NSUB_1]] = sub i32 [[NITER_NSUB]], 1
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NSUB_1]], 0
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], [[LOOP2:!llvm.loop !.*]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[EXIT_LOOPEXIT_UNR_LCSSA]]
@@ -124,16 +124,11 @@ define dso_local void @cannotProveDivisibleTC(i8* noalias nocapture %a, i8* noal
 ; CHECK:       for.body.epil.preheader:
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
-; CHECK-NEXT:    [[I_011_EPIL:%.*]] = phi i32 [ [[I_011_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[I_011_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[I_011_UNR]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX_EPIL]], align 1
 ; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
-; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[I_011_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[I_011_UNR]]
 ; CHECK-NEXT:    store i8 [[ADD_EPIL]], i8* [[ARRAYIDX4_EPIL]], align 1
-; CHECK-NEXT:    [[INC_EPIL:%.*]] = add nuw nsw i32 [[I_011_EPIL]], 1
-; CHECK-NEXT:    [[CMP1_EPIL:%.*]] = icmp slt i32 [[INC_EPIL]], [[N]]
-; CHECK-NEXT:    br label [[EXIT_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       exit.loopexit.epilog-lcssa:
 ; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]


        


More information about the llvm-commits mailing list