[llvm] de2fed6 - [unroll] Keep unrolled iterations with initial iteration

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 12 11:41:24 PST 2021


Author: Philip Reames
Date: 2021-11-12T11:40:50-08:00
New Revision: de2fed61528a5584dc54c47f6754408597be24de

URL: https://github.com/llvm/llvm-project/commit/de2fed61528a5584dc54c47f6754408597be24de
DIFF: https://github.com/llvm/llvm-project/commit/de2fed61528a5584dc54c47f6754408597be24de.diff

LOG: [unroll] Keep unrolled iterations with initial iteration

The unrolling code was previously inserting new cloned blocks at the end of the function.  The result of this with typical loop structures is that the new iterations are placed far from the initial iteration.

With unrolling, the general assumption is that the a) the loop is reasonable hot, and b) the first Count-1 copies of the loop are rarely (if ever) loop exiting.  As such, placing Count-1 copies out of line is a fairly poor code placement choice.  We'd much rather fall through into the hot (non-exiting) path.  For code with branch profiles, later layout would fix this, but this may have a positive impact on non-PGO compiled code.

However, the real motivation for this change isn't performance.  Its readability and human understanding.  Having to jump around long distances in an IR file to trace an unrolled loop structure is error prone and tedious.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/test/DebugInfo/unrolled-loop-remainder.ll
    llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
    llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
    llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
    llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
    llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll
    llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
    llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
    llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
    llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
    llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll
    llvm/test/Transforms/LoopUnroll/multiple-exits.ll
    llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
    llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll
    llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
    llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
    llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
    llvm/test/Transforms/LoopUnroll/scevunroll.ll
    llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
    llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll
    llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll
    llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index ce463927fd50c..b0c622b98d5e4 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -514,6 +514,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
   identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
 
+  // We place the unrolled iterations immediately after the original loop
+  // latch.  This is a reasonable default placement if we don't have block
+  // frequencies, and if we do, well the layout will be adjusted later.
+  auto BlockInsertPt = std::next(LatchBlock->getIterator());
   for (unsigned It = 1; It != ULO.Count; ++It) {
     SmallVector<BasicBlock *, 8> NewBlocks;
     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -522,7 +526,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
       ValueToValueMapTy VMap;
       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
-      Header->getParent()->getBasicBlockList().push_back(New);
+      Header->getParent()->getBasicBlockList().insert(BlockInsertPt, New);
 
       assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
              "Header should not be in a sub-loop");

diff  --git a/llvm/test/DebugInfo/unrolled-loop-remainder.ll b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
index 83c30dec780d6..ba4ce1f409f69 100644
--- a/llvm/test/DebugInfo/unrolled-loop-remainder.ll
+++ b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
@@ -38,71 +38,71 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
 ; CHECK-NEXT:    [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1, !dbg [[DBG24]]
 ; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0, !dbg [[DBG24]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_1:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.1:
+; CHECK-NEXT:    [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_PROL_1:%.*]] = sext i32 [[TMP7]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[CONV_PROL_1]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_PROL_1:%.*]] = add nsw i32 [[ADD_PROL]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1, !dbg [[DBG24]]
+; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.2:
+; CHECK-NEXT:    [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_PROL_2:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[CONV_PROL_2]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_PROL_2:%.*]] = add nsw i32 [[ADD_PROL_1]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
 ; CHECK:       for.body.prol.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[DOTLCSSA_UNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP20:%.*]], [[FOR_BODY_PROL_1]] ], [ [[TMP22:%.*]], [[FOR_BODY_PROL_2:%.*]] ]
-; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP20]], [[FOR_BODY_PROL_1]] ], [ [[TMP22]], [[FOR_BODY_PROL_2]] ]
-; CHECK-NEXT:    [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[ADD_PROL_1:%.*]], [[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2:%.*]], [[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTLCSSA_UNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP8]], [[FOR_BODY_PROL_1]] ], [ [[TMP10]], [[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[ADD_PROL_1]], [[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2]], [[FOR_BODY_PROL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]]
 ; CHECK:       for.body.prol.loopexit:
 ; CHECK-NEXT:    [[DOTLCSSA_UNR:%.*]] = phi i32* [ undef, [[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
 ; CHECK-NEXT:    [[DOTUNR:%.*]] = phi i32* [ [[A_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
 ; CHECK-NEXT:    [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP3]], 3, !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]], !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP3]], 3, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]], !dbg [[DBG24]]
 ; CHECK:       for.body.lr.ph.new:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg [[DBG24]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP8:%.*]] = phi i32* [ [[DOTUNR]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ], !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[DOTUNR1]], [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[CONV]] to i32*, !dbg [[DBG28]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP9]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV_1:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[CONV_1]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32* [ [[DOTUNR]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ], !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[DOTUNR1]], [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[CONV]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_1:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[CONV_1]] to i32*, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[ADD]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV_2:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[CONV_2]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_2:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[CONV_2]] to i32*, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[ADD_1]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV_3:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP17]] = inttoptr i64 [[CONV_3]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_3:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP21]] = inttoptr i64 [[CONV_3]] to i32*, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[ADD_2]], 2, !dbg [[DBG29]]
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[ADD_3]], 0, !dbg [[DBG24]]
 ; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:%.*]], label [[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       for.cond.for.end_crit_edge.unr-lcssa:
-; CHECK-NEXT:    [[DOTLCSSA_PH:%.*]] = phi i32* [ [[TMP17]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DOTLCSSA_PH:%.*]] = phi i32* [ [[TMP21]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]]
 ; CHECK:       for.cond.for.end_crit_edge:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32* [ [[DOTLCSSA_UNR]], [[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP2]], 2, !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP2]], 2, !dbg [[DBG24]]
 ; CHECK-NEXT:    store i32* [[DOTLCSSA]], i32** @a, align 8, !dbg [[DBG25]], !tbaa [[TBAA26]]
-; CHECK-NEXT:    store i32 [[TMP18]], i32* @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    store i32 [[TMP22]], i32* @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[TBAA20]]
 ; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG24]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret i32 undef, !dbg [[DBG34:![0-9]+]]
-; CHECK:       for.body.prol.1:
-; CHECK-NEXT:    [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV_PROL_1:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP20]] = inttoptr i64 [[CONV_PROL_1]] to i32*, !dbg [[DBG28]]
-; CHECK-NEXT:    [[ADD_PROL_1]] = add nsw i32 [[ADD_PROL]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1, !dbg [[DBG24]]
-; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0, !dbg [[DBG24]]
-; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
-; CHECK:       for.body.prol.2:
-; CHECK-NEXT:    [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 1, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
-; CHECK-NEXT:    [[CONV_PROL_2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG28]]
-; CHECK-NEXT:    [[TMP22]] = inttoptr i64 [[CONV_PROL_2]] to i32*, !dbg [[DBG28]]
-; CHECK-NEXT:    [[ADD_PROL_2]] = add nsw i32 [[ADD_PROL_1]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
 ;
 entry:
   %.pr = load i32, i32* @b, align 4, !dbg !17, !tbaa !20

diff  --git a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
index 3e611430d69ee..7bb2d732195a6 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
@@ -17,24 +17,24 @@ define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
 ; CHECK-NEXT:    [[SUB5:%.*]] = sub i32 [[SUB]], [[J:%.*]]
 ; CHECK-NEXT:    [[COND2:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2]], label [[IF_THEN_LOOPEXIT:%.*]], label [[IF_ELSE_1:%.*]]
-; CHECK:       if.then.loopexit:
-; CHECK-NEXT:    [[SUB5_LCSSA:%.*]] = phi i32 [ [[SUB5]], [[IF_ELSE]] ], [ [[SUB5_1:%.*]], [[IF_ELSE_1]] ], [ [[SUB5_2:%.*]], [[IF_ELSE_2:%.*]] ], [ [[SUB5_3]], [[IF_ELSE_3]] ]
-; CHECK-NEXT:    br label [[IF_THEN]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[I_TR:%.*]] = phi i32 [ [[I]], [[ENTRY:%.*]] ], [ [[SUB5_LCSSA]], [[IF_THEN_LOOPEXIT]] ]
-; CHECK-NEXT:    ret void
 ; CHECK:       if.else.1:
-; CHECK-NEXT:    [[SUB5_1]] = sub i32 [[SUB5]], [[J]]
+; CHECK-NEXT:    [[SUB5_1:%.*]] = sub i32 [[SUB5]], [[J]]
 ; CHECK-NEXT:    [[COND2_1:%.*]] = call zeroext i1 @check()
-; CHECK-NEXT:    br i1 [[COND2_1]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE_2]]
+; CHECK-NEXT:    br i1 [[COND2_1]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE_2:%.*]]
 ; CHECK:       if.else.2:
-; CHECK-NEXT:    [[SUB5_2]] = sub i32 [[SUB5_1]], [[J]]
+; CHECK-NEXT:    [[SUB5_2:%.*]] = sub i32 [[SUB5_1]], [[J]]
 ; CHECK-NEXT:    [[COND2_2:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2_2]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE_3]]
 ; CHECK:       if.else.3:
 ; CHECK-NEXT:    [[SUB5_3]] = sub i32 [[SUB5_2]], [[J]]
 ; CHECK-NEXT:    [[COND2_3:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2_3]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       if.then.loopexit:
+; CHECK-NEXT:    [[SUB5_LCSSA:%.*]] = phi i32 [ [[SUB5]], [[IF_ELSE]] ], [ [[SUB5_1]], [[IF_ELSE_1]] ], [ [[SUB5_2]], [[IF_ELSE_2]] ], [ [[SUB5_3]], [[IF_ELSE_3]] ]
+; CHECK-NEXT:    br label [[IF_THEN]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[I_TR:%.*]] = phi i32 [ [[I]], [[ENTRY:%.*]] ], [ [[SUB5_LCSSA]], [[IF_THEN_LOOPEXIT]] ]
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cond1 = call zeroext i1 @check()
@@ -77,17 +77,11 @@ define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
 ; CHECK-NEXT:    [[INDVAR_NEXT:%.*]] = add nuw nsw i64 [[INDVAR]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVAR_NEXT]], [[TMP]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB_1:%.*]], label [[BB1_BB2_CRIT_EDGE:%.*]]
-; CHECK:       bb1.bb2_crit_edge:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP2]], [[BB1]] ], [ [[TMP4:%.*]], [[BB1_1:%.*]] ], [ [[TMP6:%.*]], [[BB1_2:%.*]] ], [ [[TMP8]], [[BB1_3]] ]
-; CHECK-NEXT:    br label [[BB2]]
-; CHECK:       bb2:
-; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA]], [[BB1_BB2_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
 ; CHECK:       bb.1:
 ; CHECK-NEXT:    [[SCEVGEP_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR_NEXT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SCEVGEP_1]], align 1
-; CHECK-NEXT:    [[TMP4]] = add nsw i32 [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    br label [[BB1_1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    br label [[BB1_1:%.*]]
 ; CHECK:       bb1.1:
 ; CHECK-NEXT:    [[INDVAR_NEXT_1:%.*]] = add nuw nsw i64 [[INDVAR_NEXT]], 1
 ; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVAR_NEXT_1]], [[TMP]]
@@ -95,8 +89,8 @@ define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
 ; CHECK:       bb.2:
 ; CHECK-NEXT:    [[SCEVGEP_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR_NEXT_1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[SCEVGEP_2]], align 1
-; CHECK-NEXT:    [[TMP6]] = add nsw i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    br label [[BB1_2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    br label [[BB1_2:%.*]]
 ; CHECK:       bb1.2:
 ; CHECK-NEXT:    [[INDVAR_NEXT_2:%.*]] = add nuw nsw i64 [[INDVAR_NEXT_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_2:%.*]] = icmp ne i64 [[INDVAR_NEXT_2]], [[TMP]]
@@ -110,6 +104,12 @@ define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
 ; CHECK-NEXT:    [[INDVAR_NEXT_3]] = add i64 [[INDVAR_NEXT_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp ne i64 [[INDVAR_NEXT_3]], [[TMP]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[BB]], label [[BB1_BB2_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       bb1.bb2_crit_edge:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP2]], [[BB1]] ], [ [[TMP4]], [[BB1_1]] ], [ [[TMP6]], [[BB1_2]] ], [ [[TMP8]], [[BB1_3]] ]
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA]], [[BB1_BB2_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
 ;
 entry:
   %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
@@ -162,20 +162,12 @@ define i32 @test3() nounwind uwtable ssp align 2 {
 ; CHECK:       do.cond:
 ; CHECK-NEXT:    [[COND3:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND3]], label [[DO_END:%.*]], label [[DO_BODY_1:%.*]]
-; CHECK:       do.end:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return.loopexit:
-; CHECK-NEXT:    [[TMP7_I_LCSSA:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ [[TMP7_I_1:%.*]], [[LAND_LHS_TRUE_1:%.*]] ], [ [[TMP7_I_2:%.*]], [[LAND_LHS_TRUE_2:%.*]] ], [ [[TMP7_I_3:%.*]], [[LAND_LHS_TRUE_3:%.*]] ]
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[DO_END]] ], [ 0, [[ENTRY:%.*]] ], [ [[TMP7_I_LCSSA]], [[RETURN_LOOPEXIT]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ; CHECK:       do.body.1:
 ; CHECK-NEXT:    [[COND2_1:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2_1]], label [[EXIT_1:%.*]], label [[DO_COND_1:%.*]]
 ; CHECK:       exit.1:
-; CHECK-NEXT:    [[TMP7_I_1]] = load i32, i32* undef, align 8
-; CHECK-NEXT:    br i1 undef, label [[DO_COND_1]], label [[LAND_LHS_TRUE_1]]
+; CHECK-NEXT:    [[TMP7_I_1:%.*]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_1]], label [[LAND_LHS_TRUE_1:%.*]]
 ; CHECK:       land.lhs.true.1:
 ; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_1]]
 ; CHECK:       do.cond.1:
@@ -185,8 +177,8 @@ define i32 @test3() nounwind uwtable ssp align 2 {
 ; CHECK-NEXT:    [[COND2_2:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2_2]], label [[EXIT_2:%.*]], label [[DO_COND_2:%.*]]
 ; CHECK:       exit.2:
-; CHECK-NEXT:    [[TMP7_I_2]] = load i32, i32* undef, align 8
-; CHECK-NEXT:    br i1 undef, label [[DO_COND_2]], label [[LAND_LHS_TRUE_2]]
+; CHECK-NEXT:    [[TMP7_I_2:%.*]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_2]], label [[LAND_LHS_TRUE_2:%.*]]
 ; CHECK:       land.lhs.true.2:
 ; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_2]]
 ; CHECK:       do.cond.2:
@@ -196,13 +188,21 @@ define i32 @test3() nounwind uwtable ssp align 2 {
 ; CHECK-NEXT:    [[COND2_3:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND2_3]], label [[EXIT_3:%.*]], label [[DO_COND_3:%.*]]
 ; CHECK:       exit.3:
-; CHECK-NEXT:    [[TMP7_I_3]] = load i32, i32* undef, align 8
-; CHECK-NEXT:    br i1 undef, label [[DO_COND_3]], label [[LAND_LHS_TRUE_3]]
+; CHECK-NEXT:    [[TMP7_I_3:%.*]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_3]], label [[LAND_LHS_TRUE_3:%.*]]
 ; CHECK:       land.lhs.true.3:
 ; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_3]]
 ; CHECK:       do.cond.3:
 ; CHECK-NEXT:    [[COND3_3:%.*]] = call zeroext i1 @check()
 ; CHECK-NEXT:    br i1 [[COND3_3]], label [[DO_END]], label [[DO_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       do.end:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return.loopexit:
+; CHECK-NEXT:    [[TMP7_I_LCSSA:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ [[TMP7_I_1]], [[LAND_LHS_TRUE_1]] ], [ [[TMP7_I_2]], [[LAND_LHS_TRUE_2]] ], [ [[TMP7_I_3]], [[LAND_LHS_TRUE_3]] ]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[DO_END]] ], [ 0, [[ENTRY:%.*]] ], [ [[TMP7_I_LCSSA]], [[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
   %cond1 = call zeroext i1 @check()

diff  --git a/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll b/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
index be4b6ff64fdde..af648bae86426 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
@@ -33,16 +33,13 @@ define i32 @foo() uwtable ssp align 2 {
 ; CHECK:       do.cond:
 ; CHECK-NEXT:    [[CMP18:%.*]] = icmp sgt i32 [[CALL2]], -1
 ; CHECK-NEXT:    br i1 [[CMP18]], label [[LAND_LHS_TRUE_I_1:%.*]], label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ 0, [[DO_COND]] ], [ [[TMP7_I_1:%.*]], [[LAND_LHS_TRUE_1:%.*]] ], [ 0, [[DO_COND_1:%.*]] ], [ [[TMP7_I_2:%.*]], [[LAND_LHS_TRUE_2:%.*]] ], [ 0, [[DO_COND_2:%.*]] ], [ [[TMP7_I_3:%.*]], [[LAND_LHS_TRUE_3:%.*]] ], [ 0, [[DO_COND_3:%.*]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ; CHECK:       land.lhs.true.i.1:
 ; CHECK-NEXT:    [[CMP4_I_1:%.*]] = call zeroext i1 @check() #[[ATTR0]]
-; CHECK-NEXT:    br i1 [[CMP4_I_1]], label [[BAR_EXIT_1:%.*]], label [[DO_COND_1]]
+; CHECK-NEXT:    br i1 [[CMP4_I_1]], label [[BAR_EXIT_1:%.*]], label [[DO_COND_1:%.*]]
 ; CHECK:       bar.exit.1:
-; CHECK-NEXT:    [[TMP7_I_1]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[TMP7_I_1:%.*]] = call i32 @getval() #[[ATTR0]]
 ; CHECK-NEXT:    [[CMP_NOT_1:%.*]] = icmp eq i32 [[TMP7_I_1]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT_1]], label [[DO_COND_1]], label [[LAND_LHS_TRUE_1]]
+; CHECK-NEXT:    br i1 [[CMP_NOT_1]], label [[DO_COND_1]], label [[LAND_LHS_TRUE_1:%.*]]
 ; CHECK:       land.lhs.true.1:
 ; CHECK-NEXT:    [[CALL10_1:%.*]] = call i32 @getval()
 ; CHECK-NEXT:    [[CMP11_1:%.*]] = icmp eq i32 [[CALL10_1]], 0
@@ -52,11 +49,11 @@ define i32 @foo() uwtable ssp align 2 {
 ; CHECK-NEXT:    br i1 [[CMP18_1]], label [[LAND_LHS_TRUE_I_2:%.*]], label [[RETURN]]
 ; CHECK:       land.lhs.true.i.2:
 ; CHECK-NEXT:    [[CMP4_I_2:%.*]] = call zeroext i1 @check() #[[ATTR0]]
-; CHECK-NEXT:    br i1 [[CMP4_I_2]], label [[BAR_EXIT_2:%.*]], label [[DO_COND_2]]
+; CHECK-NEXT:    br i1 [[CMP4_I_2]], label [[BAR_EXIT_2:%.*]], label [[DO_COND_2:%.*]]
 ; CHECK:       bar.exit.2:
-; CHECK-NEXT:    [[TMP7_I_2]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[TMP7_I_2:%.*]] = call i32 @getval() #[[ATTR0]]
 ; CHECK-NEXT:    [[CMP_NOT_2:%.*]] = icmp eq i32 [[TMP7_I_2]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT_2]], label [[DO_COND_2]], label [[LAND_LHS_TRUE_2]]
+; CHECK-NEXT:    br i1 [[CMP_NOT_2]], label [[DO_COND_2]], label [[LAND_LHS_TRUE_2:%.*]]
 ; CHECK:       land.lhs.true.2:
 ; CHECK-NEXT:    [[CALL10_2:%.*]] = call i32 @getval()
 ; CHECK-NEXT:    [[CMP11_2:%.*]] = icmp eq i32 [[CALL10_2]], 0
@@ -66,11 +63,11 @@ define i32 @foo() uwtable ssp align 2 {
 ; CHECK-NEXT:    br i1 [[CMP18_2]], label [[LAND_LHS_TRUE_I_3:%.*]], label [[RETURN]]
 ; CHECK:       land.lhs.true.i.3:
 ; CHECK-NEXT:    [[CMP4_I_3:%.*]] = call zeroext i1 @check() #[[ATTR0]]
-; CHECK-NEXT:    br i1 [[CMP4_I_3]], label [[BAR_EXIT_3:%.*]], label [[DO_COND_3]]
+; CHECK-NEXT:    br i1 [[CMP4_I_3]], label [[BAR_EXIT_3:%.*]], label [[DO_COND_3:%.*]]
 ; CHECK:       bar.exit.3:
-; CHECK-NEXT:    [[TMP7_I_3]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[TMP7_I_3:%.*]] = call i32 @getval() #[[ATTR0]]
 ; CHECK-NEXT:    [[CMP_NOT_3:%.*]] = icmp eq i32 [[TMP7_I_3]], 0
-; CHECK-NEXT:    br i1 [[CMP_NOT_3]], label [[DO_COND_3]], label [[LAND_LHS_TRUE_3]]
+; CHECK-NEXT:    br i1 [[CMP_NOT_3]], label [[DO_COND_3]], label [[LAND_LHS_TRUE_3:%.*]]
 ; CHECK:       land.lhs.true.3:
 ; CHECK-NEXT:    [[CALL10_3:%.*]] = call i32 @getval()
 ; CHECK-NEXT:    [[CMP11_3:%.*]] = icmp eq i32 [[CALL10_3]], 0
@@ -78,6 +75,9 @@ define i32 @foo() uwtable ssp align 2 {
 ; CHECK:       do.cond.3:
 ; CHECK-NEXT:    [[CMP18_3:%.*]] = icmp sgt i32 [[CALL2]], -1
 ; CHECK-NEXT:    br i1 [[CMP18_3]], label [[LAND_LHS_TRUE_I]], label [[RETURN]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ 0, [[DO_COND]] ], [ [[TMP7_I_1]], [[LAND_LHS_TRUE_1]] ], [ 0, [[DO_COND_1]] ], [ [[TMP7_I_2]], [[LAND_LHS_TRUE_2]] ], [ 0, [[DO_COND_2]] ], [ [[TMP7_I_3]], [[LAND_LHS_TRUE_3]] ], [ 0, [[DO_COND_3]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
   br i1 undef, label %return, label %if.end

diff  --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
index 5bbab929c9364..5c8f9ca016795 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -67,8 +67,6 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
 ; CHECK-A55-NEXT:    store i32 [[ADD21_EPIL]], i32* [[ARRAYIDX20]], align 4
 ; CHECK-A55-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 1
 ; CHECK-A55-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]]
-; CHECK-A55:       for.end:
-; CHECK-A55-NEXT:    ret void
 ; CHECK-A55:       for.body6.epil.1:
 ; CHECK-A55-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
 ; CHECK-A55-NEXT:    [[CONV_EPIL_1:%.*]] = sext i16 [[TMP14]] to i32
@@ -90,6 +88,8 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
 ; CHECK-A55-NEXT:    [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP19]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_EPIL_2]], i32* [[ARRAYIDX20]], align 4
 ; CHECK-A55-NEXT:    br label [[FOR_END]]
+; CHECK-A55:       for.end:
+; CHECK-A55-NEXT:    ret void
 ;
 ; CHECK-GENERIC-LABEL: @runtime_unroll_generic(
 ; CHECK-GENERIC-NEXT:  entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
index ee07518f8cac4..5c6ac690c0caa 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll
@@ -21,10 +21,6 @@ define i32 @tripcount_11() {
 ; CHECK-NEXT:    br label [[DO_BODY6:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_1:%.*]], label [[IF_THEN11:%.*]]
-; CHECK:       do.body6:
-; CHECK-NEXT:    br i1 true, label [[FOR_COND:%.*]], label [[IF_THEN11]]
-; CHECK:       if.then11:
-; CHECK-NEXT:    unreachable
 ; CHECK:       for.cond.1:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_2:%.*]], label [[IF_THEN11]]
 ; CHECK:       for.cond.2:
@@ -45,6 +41,10 @@ define i32 @tripcount_11() {
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_10:%.*]], label [[IF_THEN11]]
 ; CHECK:       for.cond.10:
 ; CHECK-NEXT:    ret i32 0
+; CHECK:       do.body6:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND:%.*]], label [[IF_THEN11]]
+; CHECK:       if.then11:
+; CHECK-NEXT:    unreachable
 ;
 do.body6.preheader:
   br label %do.body6

diff  --git a/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll b/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll
index 3b82365d1a6ef..ee905e5b10fe8 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/unroll-upperbound.ll
@@ -18,8 +18,6 @@ define void @test(i1 %cond) {
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
 ; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[FOR_BODY_1:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    switch i32 1, label [[SW_DEFAULT_1:%.*]] [
 ; CHECK-NEXT:    i32 2, label [[LATCH_1:%.*]]
@@ -38,6 +36,8 @@ define void @test(i1 %cond) {
 ; CHECK-NEXT:    br label [[LATCH_2]]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %0 = select i1 %cond, i32 2, i32 3

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll b/llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
index f2e748ade0a2d..e12dbf031b3b9 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/loop-unrolling.ll
@@ -121,14 +121,14 @@ for.body4:
 ; CHECK-NOUNROLL: br
 
 ; CHECK-UNROLL: for.body4.epil:
+; CHECK-UNROLL: for.body4.epil.1:
+; CHECK-UNROLL: for.body4.epil.2:
 ; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ]
 ; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1
 ; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV1]], 1
 ; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV2]], 1
 ; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV3]], 1
 ; CHECK-UNROLL: br
-; CHECK-UNROLL: for.body4.epil.1:
-; CHECK-UNROLL: for.body4.epil.2:
 
   %w.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ]
   %add = add i32 %w.024, %mul

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
index 156c0ab106587..8c4257698ab7c 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
@@ -45,8 +45,37 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
+; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[TOBOOL_EPIL_1:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_1]], label [[FOR_INC_EPIL_1:%.*]], label [[IF_THEN_EPIL_1:%.*]]
+; CHECK:       if.then.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX1_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_1]], align 4
+; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[TMP5]], [[TEMP_1_EPIL]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
+; CHECK:       for.inc.epil.1:
+; CHECK-NEXT:    [[TEMP_1_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[TEMP_1_EPIL]], [[FOR_BODY_EPIL_1]] ]
+; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[TOBOOL_EPIL_2:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_2]], label [[FOR_INC_EPIL_2:%.*]], label [[IF_THEN_EPIL_2:%.*]]
+; CHECK:       if.then.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX1_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_2]], align 4
+; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[TMP7]], [[TEMP_1_EPIL_1]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
+; CHECK:       for.inc.epil.2:
+; CHECK-NEXT:    [[TEMP_1_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[TEMP_1_EPIL_1]], [[FOR_BODY_EPIL_2]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
-; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1:%.*]], [[FOR_INC_EPIL_1:%.*]] ], [ [[TEMP_1_EPIL_2:%.*]], [[FOR_INC_EPIL_2:%.*]] ]
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1]], [[FOR_INC_EPIL_1]] ], [ [[TEMP_1_EPIL_2]], [[FOR_INC_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
@@ -60,51 +89,22 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK-NEXT:    [[TEMP_09:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_010]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_010]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP5]], [[TEMP_09]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP9]], [[TEMP_09]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[TEMP_09]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_010]], 1
 ; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_1]], label [[FOR_INC_1:%.*]], label [[IF_THEN_1:%.*]]
-; CHECK:       for.body.epil.1:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
-; CHECK-NEXT:    [[TOBOOL_EPIL_1:%.*]] = icmp eq i32 [[TMP7]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_1]], label [[FOR_INC_EPIL_1]], label [[IF_THEN_EPIL_1:%.*]]
-; CHECK:       if.then.epil.1:
-; CHECK-NEXT:    [[ARRAYIDX1_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_1]], align 4
-; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[TMP8]], [[TEMP_1_EPIL]]
-; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
-; CHECK:       for.inc.epil.1:
-; CHECK-NEXT:    [[TEMP_1_EPIL_1]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[TEMP_1_EPIL]], [[FOR_BODY_EPIL_1]] ]
-; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       for.body.epil.2:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
-; CHECK-NEXT:    [[TOBOOL_EPIL_2:%.*]] = icmp eq i32 [[TMP9]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_2]], label [[FOR_INC_EPIL_2]], label [[IF_THEN_EPIL_2:%.*]]
-; CHECK:       if.then.epil.2:
-; CHECK-NEXT:    [[ARRAYIDX1_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_2]], align 4
-; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[TMP10]], [[TEMP_1_EPIL_1]]
-; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
-; CHECK:       for.inc.epil.2:
-; CHECK-NEXT:    [[TEMP_1_EPIL_2]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[TEMP_1_EPIL_1]], [[FOR_BODY_EPIL_2]] ]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       if.then.1:
 ; CHECK-NEXT:    [[ARRAYIDX1_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX1_1]], align 4
@@ -203,41 +203,34 @@ define void @test_two_exits(i32* nocapture %Output,
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_016]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_1:%.*]], label [[CLEANUP_LOOPEXIT]]
-; CHECK:       cleanup.loopexit:
-; CHECK-NEXT:    [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_ADD]], [[IF_END]] ], [ [[TEMP_015]], [[FOR_BODY]] ], [ [[TEMP_0_ADD]], [[FOR_BODY_1]] ], [ [[TEMP_0_ADD_1:%.*]], [[IF_END_1:%.*]] ], [ [[TEMP_0_ADD_1]], [[FOR_BODY_2:%.*]] ], [ [[TEMP_0_ADD_2:%.*]], [[IF_END_2:%.*]] ], [ [[TEMP_0_ADD_2]], [[FOR_BODY_3:%.*]] ], [ [[TEMP_0_ADD_3]], [[IF_END_3]] ]
-; CHECK-NEXT:    br label [[CLEANUP]]
-; CHECK:       cleanup:
-; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_0_LCSSA_PH]], [[CLEANUP_LOOPEXIT]] ]
-; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP2]], 65535
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_1]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_1:%.*]]
 ; CHECK:       if.end.1:
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    [[ADD_1:%.*]] = select i1 [[TOBOOL_1]], i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    [[TEMP_0_ADD_1]] = add i32 [[ADD_1]], [[TEMP_0_ADD]]
+; CHECK-NEXT:    [[TEMP_0_ADD_1:%.*]] = add i32 [[ADD_1]], [[TEMP_0_ADD]]
 ; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[INC_1]], [[MAXJ]]
-; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_2]], label [[CLEANUP_LOOPEXIT]]
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_2:%.*]], label [[CLEANUP_LOOPEXIT]]
 ; CHECK:       for.body.2:
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ugt i32 [[TMP4]], 65535
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_2]]
+; CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_2:%.*]]
 ; CHECK:       if.end.2:
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP5]], 0
 ; CHECK-NEXT:    [[ADD_2:%.*]] = select i1 [[TOBOOL_2]], i32 0, i32 [[TMP4]]
-; CHECK-NEXT:    [[TEMP_0_ADD_2]] = add i32 [[ADD_2]], [[TEMP_0_ADD_1]]
+; CHECK-NEXT:    [[TEMP_0_ADD_2:%.*]] = add i32 [[ADD_2]], [[TEMP_0_ADD_1]]
 ; CHECK-NEXT:    [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1
 ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i32 [[INC_2]], [[MAXJ]]
-; CHECK-NEXT:    br i1 [[CMP_2]], label [[FOR_BODY_3]], label [[CLEANUP_LOOPEXIT]]
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[FOR_BODY_3:%.*]], label [[CLEANUP_LOOPEXIT]]
 ; CHECK:       for.body.3:
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
@@ -252,6 +245,13 @@ define void @test_two_exits(i32* nocapture %Output,
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[INC_2]], 1
 ; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ult i32 [[INC_3]], [[MAXJ]]
 ; CHECK-NEXT:    br i1 [[CMP_3]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_ADD]], [[IF_END]] ], [ [[TEMP_015]], [[FOR_BODY]] ], [ [[TEMP_0_ADD]], [[FOR_BODY_1]] ], [ [[TEMP_0_ADD_1]], [[IF_END_1]] ], [ [[TEMP_0_ADD_1]], [[FOR_BODY_2]] ], [ [[TEMP_0_ADD_2]], [[IF_END_2]] ], [ [[TEMP_0_ADD_2]], [[FOR_BODY_3]] ], [ [[TEMP_0_ADD_3]], [[IF_END_3]] ]
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_0_LCSSA_PH]], [[CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
 ;
   i32* nocapture readonly %Condition,
   i32* nocapture readonly %Input,
@@ -417,100 +417,100 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
-; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1:%.*]], [[FOR_INC_EPIL_1:%.*]] ], [ [[TEMP_1_EPIL_2:%.*]], [[FOR_INC_EPIL_2:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
-; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
-; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP23]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[J_027:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[TEMP_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_027]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP7]], 65535
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_027]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[TMP8]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP4]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TEMP_026]], [[COND]]
-; CHECK-NEXT:    br label [[FOR_INC:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[NOT_CMP4:%.*]] = xor i1 [[CMP4]], true
-; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[NOT_CMP4]] to i32
-; CHECK-NEXT:    [[SUB10_SINK:%.*]] = add i32 [[J_027]], [[SUB]]
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4
-; CHECK-NEXT:    [[SUB13:%.*]] = sub i32 [[TEMP_026]], [[TMP9]]
-; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       for.inc:
-; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[SUB13]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_027]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP10]], 65535
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
-; CHECK-NEXT:    [[CMP4_1:%.*]] = icmp ugt i32 [[TMP11]], [[TMP8]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_ELSE_1:%.*]]
 ; CHECK:       for.body.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
-; CHECK-NEXT:    [[CMP1_EPIL_1:%.*]] = icmp ugt i32 [[TMP12]], 65535
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[CMP1_EPIL_1:%.*]] = icmp ugt i32 [[TMP6]], 65535
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
-; CHECK-NEXT:    [[CMP4_EPIL_1:%.*]] = icmp ugt i32 [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
+; CHECK-NEXT:    [[CMP4_EPIL_1:%.*]] = icmp ugt i32 [[TMP7]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[CMP1_EPIL_1]], label [[IF_THEN_EPIL_1:%.*]], label [[IF_ELSE_EPIL_1:%.*]]
 ; CHECK:       if.else.epil.1:
 ; CHECK-NEXT:    [[NOT_CMP4_EPIL_1:%.*]] = xor i1 [[CMP4_EPIL_1]], true
 ; CHECK-NEXT:    [[SUB_EPIL_1:%.*]] = sext i1 [[NOT_CMP4_EPIL_1]] to i32
 ; CHECK-NEXT:    [[SUB10_SINK_EPIL_1:%.*]] = add i32 [[INC_EPIL]], [[SUB_EPIL_1]]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_EPIL_1]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_1]], align 4
-; CHECK-NEXT:    [[SUB13_EPIL_1:%.*]] = sub i32 [[TEMP_1_EPIL]], [[TMP14]]
-; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_1]], align 4
+; CHECK-NEXT:    [[SUB13_EPIL_1:%.*]] = sub i32 [[TEMP_1_EPIL]], [[TMP8]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_1:%.*]]
 ; CHECK:       if.then.epil.1:
 ; CHECK-NEXT:    [[COND_EPIL_1:%.*]] = zext i1 [[CMP4_EPIL_1]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[TEMP_1_EPIL]], [[COND_EPIL_1]]
 ; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
 ; CHECK:       for.inc.epil.1:
-; CHECK-NEXT:    [[TEMP_1_EPIL_1]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[SUB13_EPIL_1]], [[IF_ELSE_EPIL_1]] ]
+; CHECK-NEXT:    [[TEMP_1_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[SUB13_EPIL_1]], [[IF_ELSE_EPIL_1]] ]
 ; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.body.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
-; CHECK-NEXT:    [[CMP1_EPIL_2:%.*]] = icmp ugt i32 [[TMP15]], 65535
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[CMP1_EPIL_2:%.*]] = icmp ugt i32 [[TMP9]], 65535
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
-; CHECK-NEXT:    [[CMP4_EPIL_2:%.*]] = icmp ugt i32 [[TMP16]], [[TMP13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
+; CHECK-NEXT:    [[CMP4_EPIL_2:%.*]] = icmp ugt i32 [[TMP10]], [[TMP7]]
 ; CHECK-NEXT:    br i1 [[CMP1_EPIL_2]], label [[IF_THEN_EPIL_2:%.*]], label [[IF_ELSE_EPIL_2:%.*]]
 ; CHECK:       if.else.epil.2:
 ; CHECK-NEXT:    [[NOT_CMP4_EPIL_2:%.*]] = xor i1 [[CMP4_EPIL_2]], true
 ; CHECK-NEXT:    [[SUB_EPIL_2:%.*]] = sext i1 [[NOT_CMP4_EPIL_2]] to i32
 ; CHECK-NEXT:    [[SUB10_SINK_EPIL_2:%.*]] = add i32 [[INC_EPIL_1]], [[SUB_EPIL_2]]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_EPIL_2]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_2]], align 4
-; CHECK-NEXT:    [[SUB13_EPIL_2:%.*]] = sub i32 [[TEMP_1_EPIL_1]], [[TMP17]]
-; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_2]], align 4
+; CHECK-NEXT:    [[SUB13_EPIL_2:%.*]] = sub i32 [[TEMP_1_EPIL_1]], [[TMP11]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_2:%.*]]
 ; CHECK:       if.then.epil.2:
 ; CHECK-NEXT:    [[COND_EPIL_2:%.*]] = zext i1 [[CMP4_EPIL_2]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[TEMP_1_EPIL_1]], [[COND_EPIL_2]]
 ; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
 ; CHECK:       for.inc.epil.2:
-; CHECK-NEXT:    [[TEMP_1_EPIL_2]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[SUB13_EPIL_2]], [[IF_ELSE_EPIL_2]] ]
+; CHECK-NEXT:    [[TEMP_1_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[SUB13_EPIL_2]], [[IF_ELSE_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1]], [[FOR_INC_EPIL_1]] ], [ [[TEMP_1_EPIL_2]], [[FOR_INC_EPIL_2]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP23]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[J_027:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[TEMP_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_027]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP13]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_027]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[TMP14]], [[TMP12]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TEMP_026]], [[COND]]
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[NOT_CMP4:%.*]] = xor i1 [[CMP4]], true
+; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[NOT_CMP4]] to i32
+; CHECK-NEXT:    [[SUB10_SINK:%.*]] = add i32 [[J_027]], [[SUB]]
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[SUB13:%.*]] = sub i32 [[TEMP_026]], [[TMP15]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[SUB13]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_027]], 1
+; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP16]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[CMP4_1:%.*]] = icmp ugt i32 [[TMP17]], [[TMP14]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_ELSE_1:%.*]]
 ; CHECK:       if.else.1:
 ; CHECK-NEXT:    [[NOT_CMP4_1:%.*]] = xor i1 [[CMP4_1]], true
 ; CHECK-NEXT:    [[SUB_1:%.*]] = sext i1 [[NOT_CMP4_1]] to i32
@@ -532,7 +532,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ugt i32 [[TMP19]], 65535
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
-; CHECK-NEXT:    [[CMP4_2:%.*]] = icmp ugt i32 [[TMP20]], [[TMP11]]
+; CHECK-NEXT:    [[CMP4_2:%.*]] = icmp ugt i32 [[TMP20]], [[TMP17]]
 ; CHECK-NEXT:    br i1 [[CMP1_2]], label [[IF_THEN_2:%.*]], label [[IF_ELSE_2:%.*]]
 ; CHECK:       if.else.2:
 ; CHECK-NEXT:    [[NOT_CMP4_2:%.*]] = xor i1 [[CMP4_2]], true
@@ -742,10 +742,6 @@ define void @iterate_inc(%struct.Node* %n, i32 %limit) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load %struct.Node*, %struct.Node** [[TMP1]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq %struct.Node* [[TMP2]], null
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS_1:%.*]]
-; CHECK:       while.end.loopexit:
-; CHECK-NEXT:    br label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       land.rhs.1:
 ; CHECK-NEXT:    [[VAL_1:%.*]] = getelementptr inbounds [[STRUCT_NODE]], %struct.Node* [[TMP2]], i32 0, i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[VAL_1]], align 4
@@ -782,6 +778,10 @@ define void @iterate_inc(%struct.Node* %n, i32 %limit) {
 ; CHECK-NEXT:    [[TMP11]] = load %struct.Node*, %struct.Node** [[TMP10]], align 4
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq %struct.Node* [[TMP11]], null
 ; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS]]
+; CHECK:       while.end.loopexit:
+; CHECK-NEXT:    br label [[WHILE_END]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %tobool5 = icmp eq %struct.Node* %n, null

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
index ea18d3aa1054c..33151c68b3198 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
@@ -20,8 +20,6 @@ define void @test(i32* %x, i32 %n) {
 ; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[REM]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY_1:%.*]], label [[WHILE_END]]
-; CHECK:       while.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       while.body.1:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp slt i32 [[TMP1]], 10
@@ -40,6 +38,8 @@ define void @test(i32* %x, i32 %n) {
 ; CHECK:       if.then.2:
 ; CHECK-NEXT:    store i32 0, i32* [[INCDEC_PTR_1]], align 4
 ; CHECK-NEXT:    br label [[WHILE_END]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %sub = add nsw i32 %n, -1
@@ -76,9 +76,9 @@ define i32 @test2(i32 %l86) {
 ; CHECK-NEXT:    [[L86_OFF:%.*]] = add i32 [[L86:%.*]], -1
 ; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[L86_OFF]], 24
 ; CHECK-NEXT:    [[DOTNOT30:%.*]] = icmp ne i32 [[L86]], 25
-; CHECK-NEXT:    [[SPEC_SELECT24:%.*]] = zext i1 [[DOTNOT30]] to i32
-; CHECK-NEXT:    [[COMMON_RET31_OP:%.*]] = select i1 [[SWITCH]], i32 0, i32 [[SPEC_SELECT24]]
-; CHECK-NEXT:    ret i32 [[COMMON_RET31_OP]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = zext i1 [[DOTNOT30]] to i32
+; CHECK-NEXT:    [[COMMON_RET_OP:%.*]] = select i1 [[SWITCH]], i32 0, i32 [[SPEC_SELECT]]
+; CHECK-NEXT:    ret i32 [[COMMON_RET_OP]]
 ;
 entry:
   br label %for.body.i.i

diff  --git a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
index 3160517155844..cdc8e944715e5 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
@@ -15,12 +15,12 @@ define void @s32_max1(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
-; CHECK:       do.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       do.body.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    br label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %add = add i32 %n, 1
@@ -51,8 +51,6 @@ define void @s32_max2(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
-; CHECK:       do.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       do.body.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
@@ -60,6 +58,8 @@ define void @s32_max2(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
 ; CHECK-NEXT:    store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    br label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %add = add i32 %n, 2
@@ -163,12 +163,12 @@ define void @u32_max1(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
-; CHECK:       do.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       do.body.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    br label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %add = add i32 %n, 1
@@ -199,8 +199,6 @@ define void @u32_max2(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
-; CHECK:       do.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       do.body.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
 ; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
@@ -208,6 +206,8 @@ define void @u32_max2(i32 %n, i32* %p) {
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
 ; CHECK-NEXT:    store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    br label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %add = add i32 %n, 2

diff  --git a/llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll
index 095a7c1e1dd1a..b7d7e00fa0c9b 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll
@@ -34,11 +34,11 @@ define i1 @test_latch() {
 ; CHECK-NEXT:    [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
 ; CHECK-NEXT:    [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2_1]], label [[LATCH_1:%.*]], label [[EXIT]]
+; CHECK:       latch.1:
+; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[EXIT_VAL:%.*]] = phi i1 [ false, [[LOOP]] ], [ false, [[LATCH]] ], [ true, [[LATCH_1]] ]
 ; CHECK-NEXT:    ret i1 [[EXIT_VAL]]
-; CHECK:       latch.1:
-; CHECK-NEXT:    br label [[EXIT]]
 ;
 start:
   %a1 = alloca [2 x i64], align 8
@@ -95,22 +95,22 @@ define i1 @test_non_latch() {
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
 ; CHECK-NEXT:    [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ], [ false, [[LATCH_2:%.*]] ]
-; CHECK-NEXT:    ret i1 [[EXIT_VAL]]
 ; CHECK:       loop.1:
-; CHECK-NEXT:    br label [[LATCH_1]]
+; CHECK-NEXT:    br label [[LATCH_1:%.*]]
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
 ; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
 ; CHECK-NEXT:    [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
 ; CHECK-NEXT:    [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
 ; CHECK-NEXT:    [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
-; CHECK-NEXT:    br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]]
+; CHECK-NEXT:    br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
 ; CHECK:       loop.2:
-; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[LATCH_2]]
+; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[LATCH_2:%.*]]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1]] ], [ true, [[LOOP_2]] ], [ false, [[LATCH_2]] ]
+; CHECK-NEXT:    ret i1 [[EXIT_VAL]]
 ;
 start:
   %a1 = alloca [2 x i64], align 8

diff  --git a/llvm/test/Transforms/LoopUnroll/multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
index 0bea86350b999..9f40f51c10e63 100644
--- a/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
@@ -14,8 +14,6 @@ define void @test1() {
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[LATCH_1:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    call void @bar()
@@ -55,6 +53,8 @@ define void @test1() {
 ; CHECK:       latch.10:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop
@@ -84,8 +84,6 @@ define void @test2(i64 %N) {
 ; CHECK:       latch:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br i1 true, label [[LOOP_1:%.*]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ; CHECK:       loop.1:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[LATCH_1:%.*]]
@@ -162,6 +160,8 @@ define void @test2(i64 %N) {
 ; CHECK:       latch.11:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop

diff  --git a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
index 1cd86ec145df8..eca86b4cd9c36 100644
--- a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
+++ b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
@@ -21,8 +21,6 @@ define void @test1(i32* noalias %A) {
 ; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE]])
 ; CHECK-NEXT:    br label [[FOR_BODY_1:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]]
 ; CHECK:       for.body.for.body_crit_edge.1:
@@ -41,6 +39,8 @@ define void @test1(i32* noalias %A) {
 ; CHECK-NEXT:    br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.for.body_crit_edge.3:
 ; CHECK-NEXT:    unreachable
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %0 = load i32, i32* %A, align 4
@@ -95,10 +95,6 @@ define void @test2(i32* noalias %A) {
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE]])
 ; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1
 ; CHECK-NEXT:    br i1 true, label [[FOR_BODY_1:%.*]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    [[CMP_1:%.*]] = call i1 @foo(i64 [[INC]])
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1]], label [[FOR_END_LOOPEXIT]]
@@ -124,6 +120,10 @@ define void @test2(i32* noalias %A) {
 ; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]]
 ; CHECK-NEXT:    [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4
 ; CHECK-NEXT:    br label [[FOR_HEADER]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br i1 true, label %for.preheader, label %for.end
@@ -174,8 +174,6 @@ define void @test3(i32* noalias %A, i1 %cond) {
 ; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
 ; CHECK-NEXT:    br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]]
 ; CHECK:       for.body.for.body_crit_edge.1:
@@ -194,6 +192,8 @@ define void @test3(i32* noalias %A, i1 %cond) {
 ; CHECK-NEXT:    br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.for.body_crit_edge.3:
 ; CHECK-NEXT:    unreachable
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %0 = load i32, i32* %A, align 4
@@ -228,14 +228,14 @@ define void @test4(i32 %arg) {
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br i1 false, label [[BB4:%.*]], label [[BB1_1:%.*]]
-; CHECK:       bb4:
-; CHECK-NEXT:    unreachable
 ; CHECK:       bb1.1:
 ; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1_2:%.*]]
 ; CHECK:       bb1.2:
 ; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1_3:%.*]]
 ; CHECK:       bb1.3:
 ; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       bb4:
+; CHECK-NEXT:    unreachable
 ;
 bb:
   br label %bb1

diff  --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll
index a695938034d5b..a71248cc6e68c 100644
--- a/llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll
@@ -20,11 +20,8 @@ define i1 @test(i64* %a1, i64* %a2) {
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
 ; CHECK-NEXT:    [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ false, [[LATCH_2:%.*]] ], [ false, [[LATCH_3:%.*]] ], [ true, [[LOOP_4:%.*]] ], [ false, [[LATCH_4]] ]
-; CHECK-NEXT:    ret i1 [[EXIT_VAL]]
 ; CHECK:       loop.1:
-; CHECK-NEXT:    br label [[LATCH_1]]
+; CHECK-NEXT:    br label [[LATCH_1:%.*]]
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 1
 ; CHECK-NEXT:    [[GEP1_1:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT]]
@@ -34,7 +31,7 @@ define i1 @test(i64* %a1, i64* %a2) {
 ; CHECK-NEXT:    [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
 ; CHECK:       loop.2:
-; CHECK-NEXT:    br label [[LATCH_2]]
+; CHECK-NEXT:    br label [[LATCH_2:%.*]]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 1
 ; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_1]]
@@ -44,7 +41,7 @@ define i1 @test(i64* %a1, i64* %a2) {
 ; CHECK-NEXT:    [[EXITCOND2_2:%.*]] = icmp eq i64 [[LOAD1_2]], [[LOAD2_2]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2_2]], label [[LOOP_3:%.*]], label [[EXIT]]
 ; CHECK:       loop.3:
-; CHECK-NEXT:    br label [[LATCH_3]]
+; CHECK-NEXT:    br label [[LATCH_3:%.*]]
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_NEXT_2]], 1
 ; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_2]]
@@ -52,7 +49,7 @@ define i1 @test(i64* %a1, i64* %a2) {
 ; CHECK-NEXT:    [[LOAD1_3:%.*]] = load i64, i64* [[GEP1_3]], align 8
 ; CHECK-NEXT:    [[LOAD2_3:%.*]] = load i64, i64* [[GEP2_3]], align 8
 ; CHECK-NEXT:    [[EXITCOND2_3:%.*]] = icmp eq i64 [[LOAD1_3]], [[LOAD2_3]]
-; CHECK-NEXT:    br i1 [[EXITCOND2_3]], label [[LOOP_4]], label [[EXIT]]
+; CHECK-NEXT:    br i1 [[EXITCOND2_3]], label [[LOOP_4:%.*]], label [[EXIT]]
 ; CHECK:       loop.4:
 ; CHECK-NEXT:    [[EXITCOND_4:%.*]] = icmp eq i64 [[IV_NEXT_3]], 24
 ; CHECK-NEXT:    br i1 [[EXITCOND_4]], label [[EXIT]], label [[LATCH_4]]
@@ -64,6 +61,9 @@ define i1 @test(i64* %a1, i64* %a2) {
 ; CHECK-NEXT:    [[LOAD2_4:%.*]] = load i64, i64* [[GEP2_4]], align 8
 ; CHECK-NEXT:    [[EXITCOND2_4:%.*]] = icmp eq i64 [[LOAD1_4]], [[LOAD2_4]]
 ; CHECK-NEXT:    br i1 [[EXITCOND2_4]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1]] ], [ false, [[LATCH_2]] ], [ false, [[LATCH_3]] ], [ true, [[LOOP_4]] ], [ false, [[LATCH_4]] ]
+; CHECK-NEXT:    ret i1 [[EXIT_VAL]]
 ;
 start:
   br label %loop

diff  --git a/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
index 10f6a31445b72..be5de5c2ea736 100644
--- a/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
+++ b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
@@ -23,8 +23,6 @@ define i32 @test_partial_unroll_with_breakout_at_iter0() {
 ; CHECK-NEXT:    [[PTR_1:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_1]]
 ; CHECK-NEXT:    store i32 [[RED_NEXT_1]], i32* [[PTR_1]], align 4
 ; CHECK-NEXT:    br label [[FOR_LATCH_1:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret i32 0
 ; CHECK:       for.latch.1:
 ; CHECK-NEXT:    [[RED_NEXT_2:%.*]] = add nuw nsw i32 10, [[RED_NEXT_1]]
 ; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 2
@@ -40,6 +38,8 @@ define i32 @test_partial_unroll_with_breakout_at_iter0() {
 ; CHECK-NEXT:    br i1 [[EXITCOND_1_I_3]], label [[EXIT:%.*]], label [[FOR_LATCH_3]]
 ; CHECK:       for.latch.3:
 ; CHECK-NEXT:    br label [[FOR_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
 ;
 entry:
   br label %for.header

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
index 6f35d752d6b94..b9e81c4adefa1 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
@@ -26,13 +26,69 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1
 ; ENABLED-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]]
+; ENABLED:       for.body.1:
+; ENABLED-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
+; ENABLED-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; ENABLED-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_1:%.*]] = sub i64 [[NITER_NSUB]], 1
+; ENABLED-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2:%.*]]
+; ENABLED:       for.body.2:
+; ENABLED-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; ENABLED-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_2:%.*]] = sub i64 [[NITER_NSUB_1]], 1
+; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
+; ENABLED:       for.body.3:
+; ENABLED-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; ENABLED-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; ENABLED-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_3:%.*]] = sub i64 [[NITER_NSUB_2]], 1
+; ENABLED-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4:%.*]]
+; ENABLED:       for.body.4:
+; ENABLED-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
+; ENABLED-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_4:%.*]] = sub i64 [[NITER_NSUB_3]], 1
+; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
+; ENABLED:       for.body.5:
+; ENABLED-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; ENABLED-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
+; ENABLED-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_5:%.*]] = sub i64 [[NITER_NSUB_4]], 1
+; ENABLED-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6:%.*]]
+; ENABLED:       for.body.6:
+; ENABLED-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
+; ENABLED-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_6:%.*]] = sub i64 [[NITER_NSUB_5]], 1
+; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; ENABLED-NEXT:    br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
+; ENABLED:       for.body.7:
+; ENABLED-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; ENABLED-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
+; ENABLED-NEXT:    [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]]
+; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NSUB_7]] = sub i64 [[NITER_NSUB_6]], 1
+; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
 ; ENABLED:       for.end.unr-lcssa.loopexit:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    br label [[FOR_END_UNR_LCSSA]]
 ; ENABLED:       for.end.unr-lcssa:
-; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
@@ -47,8 +103,8 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]]
 ; ENABLED:       for.body.epil:
 ; ENABLED-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
-; ENABLED-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
-; ENABLED-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP3]], [[SUM_02_EPIL]]
+; ENABLED-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; ENABLED-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
 ; ENABLED-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
 ; ENABLED-NEXT:    [[EPIL_ITER_SUB]] = sub i64 [[EPIL_ITER]], 1
@@ -61,7 +117,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ]
 ; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; ENABLED:       for.exit2.loopexit:
-; ENABLED-NEXT:    [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ]
+; ENABLED-NEXT:    [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXIT2:%.*]]
 ; ENABLED:       for.exit2.loopexit2:
 ; ENABLED-NEXT:    [[RETVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ]
@@ -69,62 +125,6 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED:       for.exit2:
 ; ENABLED-NEXT:    [[RETVAL:%.*]] = phi i32 [ [[RETVAL_PH]], [[FOR_EXIT2_LOOPEXIT]] ], [ [[RETVAL_PH3]], [[FOR_EXIT2_LOOPEXIT2]] ]
 ; ENABLED-NEXT:    ret i32 [[RETVAL]]
-; ENABLED:       for.body.1:
-; ENABLED-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
-; ENABLED-NEXT:    [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_1:%.*]] = sub i64 [[NITER_NSUB]], 1
-; ENABLED-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2]]
-; ENABLED:       for.body.2:
-; ENABLED-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; ENABLED-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
-; ENABLED-NEXT:    [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_2:%.*]] = sub i64 [[NITER_NSUB_1]], 1
-; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3]]
-; ENABLED:       for.body.3:
-; ENABLED-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
-; ENABLED-NEXT:    [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_3:%.*]] = sub i64 [[NITER_NSUB_2]], 1
-; ENABLED-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4]]
-; ENABLED:       for.body.4:
-; ENABLED-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; ENABLED-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
-; ENABLED-NEXT:    [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_4:%.*]] = sub i64 [[NITER_NSUB_3]], 1
-; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5]]
-; ENABLED:       for.body.5:
-; ENABLED-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
-; ENABLED-NEXT:    [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_5:%.*]] = sub i64 [[NITER_NSUB_4]], 1
-; ENABLED-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6]]
-; ENABLED:       for.body.6:
-; ENABLED-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; ENABLED-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
-; ENABLED-NEXT:    [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_6:%.*]] = sub i64 [[NITER_NSUB_5]], 1
-; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
-; ENABLED-NEXT:    br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
-; ENABLED:       for.body.7:
-; ENABLED-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; ENABLED-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
-; ENABLED-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
-; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_7]] = sub i64 [[NITER_NSUB_6]], 1
-; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
-; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT]], label [[HEADER]]
 ;
 ; DISABLED-LABEL: @test(
 ; DISABLED-NEXT:  entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index 9110650a93342..472c550a2278a 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -24,39 +24,39 @@ define i64 @test1() {
 ; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
-; CHECK:       headerexit:
-; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1:%.*]], [[HEADER_1]] ], [ [[ADD_IV_2:%.*]], [[HEADER_2:%.*]] ], [ [[ADD_IV_3]], [[HEADER_3:%.*]] ]
-; CHECK-NEXT:    br label [[MERGEDEXIT:%.*]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1:%.*]], [[LATCH_1:%.*]] ], [ [[SHFT_2:%.*]], [[LATCH_2:%.*]] ], [ [[SHFT_3:%.*]], [[LATCH_3]] ]
-; CHECK-NEXT:    br label [[MERGEDEXIT]]
-; CHECK:       mergedexit:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ]
-; CHECK-NEXT:    ret i64 [[RETVAL]]
 ; CHECK:       header.1:
-; CHECK-NEXT:    [[ADD_IV_1]] = add nuw nsw i64 [[ADD_IV]], 2
+; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV]], 2
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1]], label [[HEADEREXIT]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
 ; CHECK:       latch.1:
-; CHECK-NEXT:    [[SHFT_1]] = ashr i64 [[ADD_IV_1]], 1
+; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2]], label [[LATCHEXIT]]
+; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
-; CHECK-NEXT:    [[ADD_IV_2]] = add nuw nsw i64 [[ADD_IV_1]], 2
+; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV_1]], 2
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2]], label [[HEADEREXIT]]
+; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
 ; CHECK:       latch.2:
-; CHECK-NEXT:    [[SHFT_2]] = ashr i64 [[ADD_IV_2]], 1
+; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3]], label [[LATCHEXIT]]
+; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
 ; CHECK-NEXT:    [[ADD_IV_3]] = add nuw nsw i64 [[ADD_IV_2]], 2
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
 ; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       headerexit:
+; CHECK-NEXT:    [[ADDPHI:%.*]] = phi i64 [ [[ADD_IV]], [[HEADER]] ], [ [[ADD_IV_1]], [[HEADER_1]] ], [ [[ADD_IV_2]], [[HEADER_2]] ], [ [[ADD_IV_3]], [[HEADER_3]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT:%.*]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       mergedexit:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[ADDPHI]], [[HEADEREXIT]] ], [ [[SHFTPHI]], [[LATCHEXIT]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
   br label %preheader
@@ -106,12 +106,6 @@ define  void @test2(i1 %cond, i32 %n) {
 ; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
-; CHECK:       headerexit:
-; CHECK-NEXT:    br label [[MERGEDEXIT]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    br label [[MERGEDEXIT]]
-; CHECK:       mergedexit:
-; CHECK-NEXT:    ret void
 ; CHECK:       header.1:
 ; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV]], 2
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
@@ -136,6 +130,12 @@ define  void @test2(i1 %cond, i32 %n) {
 ; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       headerexit:
+; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    br label [[MERGEDEXIT]]
+; CHECK:       mergedexit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br i1 %cond, label %preheader, label %mergedexit
@@ -183,27 +183,20 @@ define i64 @test3(i32 %n) {
 ; CHECK-NEXT:    [[SHFT:%.*]] = ashr i64 [[ADD_IV]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i64 [[SHFT]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[HEADER_1:%.*]], label [[LATCHEXIT:%.*]]
-; CHECK:       headerexit:
-; CHECK-NEXT:    br label [[EXITSUCC:%.*]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1:%.*]], [[LATCH_1:%.*]] ], [ [[SHFT_2:%.*]], [[LATCH_2:%.*]] ], [ [[SHFT_3:%.*]], [[LATCH_3]] ]
-; CHECK-NEXT:    ret i64 [[SHFTPHI]]
-; CHECK:       exitsucc:
-; CHECK-NEXT:    ret i64 96
 ; CHECK:       header.1:
 ; CHECK-NEXT:    [[ADD_IV_1:%.*]] = add nuw nsw i64 [[ADD_IV]], 2
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ult i64 [[ADD_IV_1]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1]], label [[HEADEREXIT]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[LATCH_1:%.*]], label [[HEADEREXIT]]
 ; CHECK:       latch.1:
-; CHECK-NEXT:    [[SHFT_1]] = ashr i64 [[ADD_IV_1]], 1
+; CHECK-NEXT:    [[SHFT_1:%.*]] = ashr i64 [[ADD_IV_1]], 1
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ult i64 [[SHFT_1]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[HEADER_2:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.2:
 ; CHECK-NEXT:    [[ADD_IV_2:%.*]] = add nuw nsw i64 [[ADD_IV_1]], 2
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ult i64 [[ADD_IV_2]], [[TRIP]]
-; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2]], label [[HEADEREXIT]]
+; CHECK-NEXT:    br i1 [[CMP1_2]], label [[LATCH_2:%.*]], label [[HEADEREXIT]]
 ; CHECK:       latch.2:
-; CHECK-NEXT:    [[SHFT_2]] = ashr i64 [[ADD_IV_2]], 1
+; CHECK-NEXT:    [[SHFT_2:%.*]] = ashr i64 [[ADD_IV_2]], 1
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ult i64 [[SHFT_2]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_2]], label [[HEADER_3:%.*]], label [[LATCHEXIT]]
 ; CHECK:       header.3:
@@ -211,9 +204,16 @@ define i64 @test3(i32 %n) {
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ult i64 [[ADD_IV_3]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP1_3]], label [[LATCH_3]], label [[HEADEREXIT]]
 ; CHECK:       latch.3:
-; CHECK-NEXT:    [[SHFT_3]] = ashr i64 [[ADD_IV_3]], 1
+; CHECK-NEXT:    [[SHFT_3:%.*]] = ashr i64 [[ADD_IV_3]], 1
 ; CHECK-NEXT:    [[CMP2_3:%.*]] = icmp ult i64 [[SHFT_3]], [[TRIP]]
 ; CHECK-NEXT:    br i1 [[CMP2_3]], label [[HEADER]], label [[LATCHEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       headerexit:
+; CHECK-NEXT:    br label [[EXITSUCC:%.*]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SHFTPHI:%.*]] = phi i64 [ [[SHFT]], [[LATCH]] ], [ [[SHFT_1]], [[LATCH_1]] ], [ [[SHFT_2]], [[LATCH_2]] ], [ [[SHFT_3]], [[LATCH_3]] ]
+; CHECK-NEXT:    ret i64 [[SHFTPHI]]
+; CHECK:       exitsucc:
+; CHECK-NEXT:    ret i64 96
 ;
 entry:
   br label %preheader
@@ -291,24 +291,6 @@ define void @test4(i16 %c3) {
 ; CHECK:       latch:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[EXITING_1:%.*]]
-; CHECK:       latchexit.unr-lcssa:
-; CHECK-NEXT:    br label [[LATCHEXIT]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    ret void
-; CHECK:       default.loopexit.loopexit:
-; CHECK-NEXT:    br label [[DEFAULT_LOOPEXIT:%.*]]
-; CHECK:       default.loopexit.loopexit1:
-; CHECK-NEXT:    br label [[DEFAULT_LOOPEXIT]]
-; CHECK:       default.loopexit:
-; CHECK-NEXT:    br label [[DEFAULT:%.*]]
-; CHECK:       default:
-; CHECK-NEXT:    ret void
-; CHECK:       otherexit.loopexit:
-; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
-; CHECK:       otherexit.loopexit2:
-; CHECK-NEXT:    br label [[OTHEREXIT]]
-; CHECK:       otherexit:
-; CHECK-NEXT:    br label [[DEFAULT]]
 ; CHECK:       exiting.1:
 ; CHECK-NEXT:    switch i16 [[C3]], label [[DEFAULT_LOOPEXIT_LOOPEXIT]] [
 ; CHECK-NEXT:    i16 45, label [[OTHEREXIT_LOOPEXIT]]
@@ -334,6 +316,24 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
 ; CHECK-NEXT:    [[C2_3:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_3]], [[C1]]
 ; CHECK-NEXT:    br i1 [[C2_3]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       latchexit.unr-lcssa:
+; CHECK-NEXT:    br label [[LATCHEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    ret void
+; CHECK:       default.loopexit.loopexit:
+; CHECK-NEXT:    br label [[DEFAULT_LOOPEXIT:%.*]]
+; CHECK:       default.loopexit.loopexit1:
+; CHECK-NEXT:    br label [[DEFAULT_LOOPEXIT]]
+; CHECK:       default.loopexit:
+; CHECK-NEXT:    br label [[DEFAULT:%.*]]
+; CHECK:       default:
+; CHECK-NEXT:    ret void
+; CHECK:       otherexit.loopexit:
+; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
+; CHECK:       otherexit.loopexit2:
+; CHECK-NEXT:    br label [[OTHEREXIT]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    br label [[DEFAULT]]
 ;
 preheader:
   %c1 = zext i32 undef to i64
@@ -426,45 +426,6 @@ define void @test5(i1 %c) {
 ; CHECK-NEXT:    br i1 [[C]], label [[INNERLATCH:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT:%.*]]
 ; CHECK:       innerLatch:
 ; CHECK-NEXT:    br i1 false, label [[INNERH_1:%.*]], label [[OUTERLATCH:%.*]]
-; CHECK:       outerLatch:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], 1
-; CHECK-NEXT:    br label [[INNERH_13:%.*]]
-; CHECK:       outerLatchExit.loopexit.unr-lcssa:
-; CHECK-NEXT:    br label [[OUTERLATCHEXIT_LOOPEXIT]]
-; CHECK:       outerLatchExit.loopexit:
-; CHECK-NEXT:    br label [[OUTERLATCHEXIT]]
-; CHECK:       outerLatchExit:
-; CHECK-NEXT:    ret void
-; CHECK:       exitB.loopexit.loopexit.loopexit:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT:%.*]]
-; CHECK:       exitB.loopexit.loopexit.loopexit13:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
-; CHECK:       exitB.loopexit.loopexit.loopexit15:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
-; CHECK:       exitB.loopexit.loopexit.loopexit17:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
-; CHECK:       exitB.loopexit.loopexit:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT:%.*]]
-; CHECK:       exitB.loopexit.loopexit2:
-; CHECK-NEXT:    br label [[EXITB_LOOPEXIT]]
-; CHECK:       exitB.loopexit:
-; CHECK-NEXT:    br label [[EXITB:%.*]]
-; CHECK:       exitB:
-; CHECK-NEXT:    ret void
-; CHECK:       otherexitB.loopexit.loopexit:
-; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT:%.*]]
-; CHECK:       otherexitB.loopexit.loopexit12:
-; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
-; CHECK:       otherexitB.loopexit.loopexit14:
-; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
-; CHECK:       otherexitB.loopexit.loopexit16:
-; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
-; CHECK:       otherexitB.loopexit:
-; CHECK-NEXT:    br label [[OTHEREXITB:%.*]]
-; CHECK:       otherexitB.loopexit1:
-; CHECK-NEXT:    br label [[OTHEREXITB]]
-; CHECK:       otherexitB:
-; CHECK-NEXT:    br label [[EXITB]]
 ; CHECK:       innerH.1:
 ; CHECK-NEXT:    br i1 [[C]], label [[INNEREXITING_1:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT]]
 ; CHECK:       innerexiting.1:
@@ -483,6 +444,9 @@ define void @test5(i1 %c) {
 ; CHECK-NEXT:    br i1 [[C]], label [[INNERLATCH_3:%.*]], label [[EXITB_LOOPEXIT_LOOPEXIT_LOOPEXIT]]
 ; CHECK:       innerLatch.3:
 ; CHECK-NEXT:    br i1 false, label [[INNERH]], label [[OUTERLATCH]], !llvm.loop [[LOOP6]]
+; CHECK:       outerLatch:
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], 1
+; CHECK-NEXT:    br label [[INNERH_13:%.*]]
 ; CHECK:       innerH.13:
 ; CHECK-NEXT:    br i1 [[C]], label [[INNEREXITING_14:%.*]], label [[OTHEREXITB_LOOPEXIT_LOOPEXIT12:%.*]]
 ; CHECK:       innerexiting.14:
@@ -565,6 +529,42 @@ define void @test5(i1 %c) {
 ; CHECK-NEXT:    [[TMP6_3]] = add i32 [[TMP6_2]], 1
 ; CHECK-NEXT:    [[TMP7_3:%.*]] = icmp sgt i32 [[TMP6_3]], 79
 ; CHECK-NEXT:    br i1 [[TMP7_3]], label [[OUTERLATCHEXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[OUTERH]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       outerLatchExit.loopexit.unr-lcssa:
+; CHECK-NEXT:    br label [[OUTERLATCHEXIT_LOOPEXIT]]
+; CHECK:       outerLatchExit.loopexit:
+; CHECK-NEXT:    br label [[OUTERLATCHEXIT]]
+; CHECK:       outerLatchExit:
+; CHECK-NEXT:    ret void
+; CHECK:       exitB.loopexit.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT:%.*]]
+; CHECK:       exitB.loopexit.loopexit.loopexit13:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
+; CHECK:       exitB.loopexit.loopexit.loopexit15:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
+; CHECK:       exitB.loopexit.loopexit.loopexit17:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT_LOOPEXIT]]
+; CHECK:       exitB.loopexit.loopexit:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT:%.*]]
+; CHECK:       exitB.loopexit.loopexit2:
+; CHECK-NEXT:    br label [[EXITB_LOOPEXIT]]
+; CHECK:       exitB.loopexit:
+; CHECK-NEXT:    br label [[EXITB:%.*]]
+; CHECK:       exitB:
+; CHECK-NEXT:    ret void
+; CHECK:       otherexitB.loopexit.loopexit:
+; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT:%.*]]
+; CHECK:       otherexitB.loopexit.loopexit12:
+; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
+; CHECK:       otherexitB.loopexit.loopexit14:
+; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
+; CHECK:       otherexitB.loopexit.loopexit16:
+; CHECK-NEXT:    br label [[OTHEREXITB_LOOPEXIT]]
+; CHECK:       otherexitB.loopexit:
+; CHECK-NEXT:    br label [[OTHEREXITB:%.*]]
+; CHECK:       otherexitB.loopexit1:
+; CHECK-NEXT:    br label [[OTHEREXITB]]
+; CHECK:       otherexitB:
+; CHECK-NEXT:    br label [[EXITB]]
 ;
 bb:
   %tmp = icmp sgt i32 undef, 79
@@ -643,6 +643,16 @@ define void @test6(i1 %c) {
 ; CHECK:       latch:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    br i1 [[C]], label [[LATCH_1:%.*]], label [[OTHEREXIT_LOOPEXIT]]
+; CHECK:       latch.1:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], 2
+; CHECK-NEXT:    br i1 [[C]], label [[LATCH_2:%.*]], label [[OTHEREXIT_LOOPEXIT]]
+; CHECK:       latch.2:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], 2
+; CHECK-NEXT:    br i1 [[C]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]]
+; CHECK:       latch.3:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV_NEXT_2]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616
+; CHECK-NEXT:    br i1 [[TMP6]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       latchexit.unr-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]
 ; CHECK:       latchexit:
@@ -659,16 +669,6 @@ define void @test6(i1 %c) {
 ; CHECK-NEXT:    unreachable
 ; CHECK:       latchexitsucc:
 ; CHECK-NEXT:    br label [[NOT_ZERO44]]
-; CHECK:       latch.1:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], 2
-; CHECK-NEXT:    br i1 [[C]], label [[LATCH_2:%.*]], label [[OTHEREXIT_LOOPEXIT]]
-; CHECK:       latch.2:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], 2
-; CHECK-NEXT:    br i1 [[C]], label [[LATCH_3]], label [[OTHEREXIT_LOOPEXIT]]
-; CHECK:       latch.3:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nsw i64 [[INDVARS_IV_NEXT_2]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_3]], 616
-; CHECK-NEXT:    br i1 [[TMP6]], label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP10:![0-9]+]]
 ;
 entry:
   br label %header

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 617e2e6d2765f..1b6dd4cf2838c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -36,39 +36,6 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
-; EPILOG:       exit1.loopexit:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
-; EPILOG:       exit2.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
-; EPILOG:       loop_exiting_bb1.epil:
-; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1.loopexit1
-; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    br i1 false, label %loop_latch.epil, label %exit3.loopexit2
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !0
-; EPILOG:       exit2.loopexit.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit2.loopexit
-; EPILOG:       exit2.loopexit:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_exiting_bb1.1:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
 ; EPILOG:       loop_exiting_bb2.1:
@@ -126,6 +93,39 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
+; EPILOG:       exit1.loopexit:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1.loopexit1:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
+; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
+; EPILOG:       exit2.loopexit.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
+; EPILOG:       loop_exiting_bb1.epil:
+; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1.loopexit1
+; EPILOG:       loop_exiting_bb2.epil:
+; EPILOG-NEXT:    br i1 false, label %loop_latch.epil, label %exit3.loopexit2
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !0
+; EPILOG:       exit2.loopexit.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit2.loopexit
+; EPILOG:       exit2.loopexit:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test1(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -152,6 +152,15 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
+; EPILOG-BLOCK:       loop_exiting_bb1.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
+; EPILOG-BLOCK:       loop_exiting_bb2.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_latch.1, label %exit3.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !0
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
@@ -173,15 +182,6 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit
 ; EPILOG-BLOCK:       exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_exiting_bb1.1:
-; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
-; EPILOG-BLOCK:       loop_exiting_bb2.1:
-; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_latch.1, label %exit3.loopexit
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !0
 ;
 ; PROLOG-LABEL: @test1(
 ; PROLOG-NEXT:  entry:
@@ -230,16 +230,6 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG:       loop_latch:
 ; PROLOG-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
-; PROLOG:       exit1.loopexit:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1.loopexit1:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
-; PROLOG:       exit2.loopexit.unr-lcssa:
-; PROLOG-NEXT:    br label %exit2.loopexit
-; PROLOG:       exit2.loopexit:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_exiting_bb1.1:
 ; PROLOG-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
 ; PROLOG:       loop_exiting_bb2.1:
@@ -290,6 +280,16 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa
+; PROLOG:       exit1.loopexit:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1.loopexit1:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
+; PROLOG:       exit2.loopexit.unr-lcssa:
+; PROLOG-NEXT:    br label %exit2.loopexit
+; PROLOG:       exit2.loopexit:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test1(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -327,14 +327,6 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK:       loop_latch:
 ; PROLOG-BLOCK-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
-; PROLOG-BLOCK:       exit1.loopexit:
-; PROLOG-BLOCK-NEXT:    br label %exit1
-; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    ret void
-; PROLOG-BLOCK:       exit2.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    br label %exit2.loopexit
-; PROLOG-BLOCK:       exit2.loopexit:
-; PROLOG-BLOCK-NEXT:    ret void
 ; PROLOG-BLOCK:       loop_exiting_bb1.1:
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
 ; PROLOG-BLOCK:       loop_exiting_bb2.1:
@@ -343,6 +335,14 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa, !llvm.loop !0
+; PROLOG-BLOCK:       exit1.loopexit:
+; PROLOG-BLOCK-NEXT:    br label %exit1
+; PROLOG-BLOCK:       exit1:
+; PROLOG-BLOCK-NEXT:    ret void
+; PROLOG-BLOCK:       exit2.loopexit.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    br label %exit2.loopexit
+; PROLOG-BLOCK:       exit2.loopexit:
+; PROLOG-BLOCK-NEXT:    ret void
 ;
 
 entry:
@@ -404,58 +404,13 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
-; EPILOG:       for.end.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %for.body.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %for.body.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %for.body.7 ]
-; EPILOG-NEXT:    br label %for.end.unr-lcssa
-; EPILOG:       for.end.unr-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ undef, %entry ], [ %sum.0.lcssa.ph.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %for.end
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %for.body.epil ]
-; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %for.body.epil
-; EPILOG:       for.body.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %for.end.epilog-lcssa, !llvm.loop !2
-; EPILOG:       for.end.epilog-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %for.body.epil ]
-; EPILOG-NEXT:    br label %for.end
-; EPILOG:       for.end:
-; EPILOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %sum.0.lcssa.ph1, %for.end.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %sum.0.lcssa
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ]
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
-; EPILOG-NEXT:    ret i32 %retval
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1
 ; EPILOG:       for.body.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-NEXT:    %add.1 = add nsw i32 %4, %add
+; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2
@@ -464,8 +419,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.2, label %for.exit2.loopexit, label %for.body.2
 ; EPILOG:       for.body.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
-; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.2, align 4
-; EPILOG-NEXT:    %add.2 = add nsw i32 %5, %add.1
+; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
+; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
 ; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3
@@ -474,8 +429,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.3, label %for.exit2.loopexit, label %for.body.3
 ; EPILOG:       for.body.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
-; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.3, align 4
-; EPILOG-NEXT:    %add.3 = add nsw i32 %6, %add.2
+; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
+; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
 ; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4
@@ -484,8 +439,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.4, label %for.exit2.loopexit, label %for.body.4
 ; EPILOG:       for.body.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
-; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.4, align 4
-; EPILOG-NEXT:    %add.4 = add nsw i32 %7, %add.3
+; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
+; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
 ; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5
@@ -494,8 +449,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.5, label %for.exit2.loopexit, label %for.body.5
 ; EPILOG:       for.body.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
-; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.5, align 4
-; EPILOG-NEXT:    %add.5 = add nsw i32 %8, %add.4
+; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
+; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
 ; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6
@@ -504,8 +459,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.6, label %for.exit2.loopexit, label %for.body.6
 ; EPILOG:       for.body.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
-; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.6, align 4
-; EPILOG-NEXT:    %add.6 = add nsw i32 %9, %add.5
+; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
+; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
 ; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7
@@ -514,12 +469,57 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    br i1 %cmp.7, label %for.exit2.loopexit, label %for.body.7
 ; EPILOG:       for.body.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
-; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.7, align 4
-; EPILOG-NEXT:    %add.7 = add nsw i32 %10, %add.6
+; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
+; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %for.end.unr-lcssa.loopexit, label %header
+; EPILOG:       for.end.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %for.body.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %for.body.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %for.body.7 ]
+; EPILOG-NEXT:    br label %for.end.unr-lcssa
+; EPILOG:       for.end.unr-lcssa:
+; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ undef, %entry ], [ %sum.0.lcssa.ph.ph, %for.end.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %for.end.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %for.end.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %for.end
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %for.body.epil ]
+; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %for.body.epil
+; EPILOG:       for.body.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %for.end.epilog-lcssa, !llvm.loop !2
+; EPILOG:       for.end.epilog-lcssa:
+; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %for.body.epil ]
+; EPILOG-NEXT:    br label %for.end
+; EPILOG:       for.end:
+; EPILOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %sum.0.lcssa.ph1, %for.end.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %sum.0.lcssa
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit2:
+; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    ret i32 %retval
 ;
 ; EPILOG-BLOCK-LABEL: @test2(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -545,6 +545,17 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1
+; EPILOG-BLOCK:       for.body.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %for.end.unr-lcssa.loopexit, label %header, !llvm.loop !2
 ; EPILOG-BLOCK:       for.end.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %for.body.1 ]
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %for.body.1 ]
@@ -565,8 +576,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %for.body.epil
 ; EPILOG-BLOCK:       for.body.epil:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
-; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %4, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %for.end
 ; EPILOG-BLOCK:       for.end:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ]
@@ -577,17 +588,6 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK:       for.exit2:
 ; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %retval
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1
-; EPILOG-BLOCK:       for.body.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %for.end.unr-lcssa.loopexit, label %header, !llvm.loop !2
 ;
 ; PROLOG-LABEL: @test2(
 ; PROLOG-NEXT:  entry:
@@ -640,21 +640,6 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG:       for.end.unr-lcssa:
-; PROLOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %for.body.7 ]
-; PROLOG-NEXT:    br label %for.end
-; PROLOG:       for.end:
-; PROLOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %sum.0.lcssa
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ]
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit1:
-; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
-; PROLOG-NEXT:    ret i32 %retval
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1
@@ -719,6 +704,21 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %for.end.unr-lcssa, label %header
+; PROLOG:       for.end.unr-lcssa:
+; PROLOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %for.body.7 ]
+; PROLOG-NEXT:    br label %for.end
+; PROLOG:       for.end:
+; PROLOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %sum.0.lcssa
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit1:
+; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:    ret i32 %retval
 ;
 ; PROLOG-BLOCK-LABEL: @test2(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -757,18 +757,6 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-BLOCK-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG-BLOCK:       for.end.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.1, %for.body.1 ]
-; PROLOG-BLOCK-NEXT:    br label %for.end
-; PROLOG-BLOCK:       for.end:
-; PROLOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ]
-; PROLOG-BLOCK-NEXT:    ret i32 %sum.0.lcssa
-; PROLOG-BLOCK:       for.exit2.loopexit:
-; PROLOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ]
-; PROLOG-BLOCK-NEXT:    br label %for.exit2
-; PROLOG-BLOCK:       for.exit2:
-; PROLOG-BLOCK-NEXT:    %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ]
-; PROLOG-BLOCK-NEXT:    ret i32 %retval
 ; PROLOG-BLOCK:       for.exiting_block.1:
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %for.body.1
@@ -779,6 +767,18 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
 ; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
 ; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %for.end.unr-lcssa, label %header, !llvm.loop !2
+; PROLOG-BLOCK:       for.end.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.1, %for.body.1 ]
+; PROLOG-BLOCK-NEXT:    br label %for.end
+; PROLOG-BLOCK:       for.end:
+; PROLOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %for.end.unr-lcssa ]
+; PROLOG-BLOCK-NEXT:    ret i32 %sum.0.lcssa
+; PROLOG-BLOCK:       for.exit2.loopexit:
+; PROLOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ]
+; PROLOG-BLOCK-NEXT:    br label %for.exit2
+; PROLOG-BLOCK:       for.exit2:
+; PROLOG-BLOCK-NEXT:    %retval = phi i32 [ 0, %header.prol ], [ 42, %for.exiting_block.prol ], [ %retval.ph, %for.exit2.loopexit ]
+; PROLOG-BLOCK-NEXT:    ret i32 %retval
 ;
 
 
@@ -844,44 +844,6 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-NEXT:    %sum.next = add i64 %sum, %add
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
-; EPILOG:       exit1.loopexit:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
-; EPILOG:       exit2.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil
-; EPILOG:       loop_exiting_bb1.epil:
-; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
-; EPILOG-NEXT:    i64 24, label %exit1.loopexit1
-; EPILOG-NEXT:    i64 42, label %exit3.loopexit2
-; EPILOG-NEXT:    ]
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !3
-; EPILOG:       exit2.loopexit.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit2.loopexit
-; EPILOG:       exit2.loopexit:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_exiting_bb1.1:
 ; EPILOG-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
 ; EPILOG-NEXT:    i64 24, label %exit1.loopexit
@@ -953,6 +915,44 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
+; EPILOG:       exit1.loopexit:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1.loopexit1:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
+; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
+; EPILOG:       exit2.loopexit.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil
+; EPILOG:       loop_exiting_bb1.epil:
+; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-NEXT:    i64 24, label %exit1.loopexit1
+; EPILOG-NEXT:    i64 42, label %exit3.loopexit2
+; EPILOG-NEXT:    ]
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
+; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !3
+; EPILOG:       exit2.loopexit.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit2.loopexit
+; EPILOG:       exit2.loopexit:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test3(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -982,6 +982,17 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
+; EPILOG-BLOCK:       loop_exiting_bb1.1:
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
+; EPILOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
+; EPILOG-BLOCK-NEXT:    i64 42, label %exit3.loopexit
+; EPILOG-BLOCK-NEXT:    ]
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !3
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
@@ -1006,17 +1017,6 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit
 ; EPILOG-BLOCK:       exit2.loopexit:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_exiting_bb1.1:
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
-; EPILOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
-; EPILOG-BLOCK-NEXT:    i64 42, label %exit3.loopexit
-; EPILOG-BLOCK-NEXT:    ]
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !3
 ;
 ; PROLOG-LABEL: @test3(
 ; PROLOG-NEXT:  entry:
@@ -1073,16 +1073,6 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; PROLOG-NEXT:    %sum.next = add i64 %sum, %add
 ; PROLOG-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
-; PROLOG:       exit1.loopexit:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1.loopexit1:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
-; PROLOG:       exit2.loopexit.unr-lcssa:
-; PROLOG-NEXT:    br label %exit2.loopexit
-; PROLOG:       exit2.loopexit:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_exiting_bb1.1:
 ; PROLOG-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
 ; PROLOG-NEXT:    i64 24, label %exit1.loopexit
@@ -1147,6 +1137,16 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa
+; PROLOG:       exit1.loopexit:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1.loopexit1:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
+; PROLOG:       exit2.loopexit.unr-lcssa:
+; PROLOG-NEXT:    br label %exit2.loopexit
+; PROLOG:       exit2.loopexit:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test3(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -1189,14 +1189,6 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; PROLOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
 ; PROLOG-BLOCK-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
-; PROLOG-BLOCK:       exit1.loopexit:
-; PROLOG-BLOCK-NEXT:    br label %exit1
-; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    ret void
-; PROLOG-BLOCK:       exit2.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    br label %exit2.loopexit
-; PROLOG-BLOCK:       exit2.loopexit:
-; PROLOG-BLOCK-NEXT:    ret void
 ; PROLOG-BLOCK:       loop_exiting_bb1.1:
 ; PROLOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
 ; PROLOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
@@ -1207,6 +1199,14 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa, !llvm.loop !3
+; PROLOG-BLOCK:       exit1.loopexit:
+; PROLOG-BLOCK-NEXT:    br label %exit1
+; PROLOG-BLOCK:       exit1:
+; PROLOG-BLOCK-NEXT:    ret void
+; PROLOG-BLOCK:       exit2.loopexit.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    br label %exit2.loopexit
+; PROLOG-BLOCK:       exit2.loopexit:
+; PROLOG-BLOCK-NEXT:    ret void
 ;
 
 entry:
@@ -1266,61 +1266,13 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
-; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit4, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4
-; EPILOG:       latchExit.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit2:
-; EPILOG-NEXT:    %result.ph1.ph3 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph3, %latchExit.epilog-lcssa.loopexit2 ]
-; EPILOG-NEXT:    br label %latchExit
-; EPILOG:       latchExit:
-; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %result
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit4:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    ret i32 42
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-NEXT:    %add.1 = add nsw i32 %4, %add
+; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.2
@@ -1329,8 +1281,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.2, label %for.exit2.loopexit, label %latch.2
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
-; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.2, align 4
-; EPILOG-NEXT:    %add.2 = add nsw i32 %5, %add.1
+; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
+; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
 ; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.3
@@ -1339,8 +1291,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.3, label %for.exit2.loopexit, label %latch.3
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
-; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.3, align 4
-; EPILOG-NEXT:    %add.3 = add nsw i32 %6, %add.2
+; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
+; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
 ; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.4
@@ -1349,8 +1301,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.4, label %for.exit2.loopexit, label %latch.4
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
-; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.4, align 4
-; EPILOG-NEXT:    %add.4 = add nsw i32 %7, %add.3
+; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
+; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
 ; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.5
@@ -1359,8 +1311,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.5, label %for.exit2.loopexit, label %latch.5
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
-; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.5, align 4
-; EPILOG-NEXT:    %add.5 = add nsw i32 %8, %add.4
+; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
+; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
 ; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.6
@@ -1369,8 +1321,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.6, label %for.exit2.loopexit, label %latch.6
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
-; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.6, align 4
-; EPILOG-NEXT:    %add.6 = add nsw i32 %9, %add.5
+; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
+; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
 ; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.7
@@ -1379,12 +1331,60 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.7, label %for.exit2.loopexit, label %latch.7
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
-; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.7, align 4
-; EPILOG-NEXT:    %add.7 = add nsw i32 %10, %add.6
+; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
+; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
+; EPILOG:       latchExit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG:       latchExit.unr-lcssa:
+; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit4, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4
+; EPILOG:       latchExit.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa.loopexit2:
+; EPILOG-NEXT:    %result.ph1.ph3 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa:
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph3, %latchExit.epilog-lcssa.loopexit2 ]
+; EPILOG-NEXT:    br label %latchExit
+; EPILOG:       latchExit:
+; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %result
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit4:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    ret i32 42
 ;
 ; EPILOG-BLOCK-LABEL: @hdr_latch_same_exit(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -1410,14 +1410,25 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
-; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
+; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
+; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK:       latchExit.unr-lcssa:
+; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
@@ -1430,8 +1441,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
-; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %4, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ]
@@ -1446,17 +1457,6 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
 ; EPILOG-BLOCK-NEXT:    ret i32 42
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
 ;
 ; PROLOG-LABEL: @hdr_latch_same_exit(
 ; PROLOG-NEXT:  entry:
@@ -1509,24 +1509,6 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1
-; PROLOG:       latchExit.unr-lcssa.loopexit:
-; PROLOG-NEXT:    %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ], [ %add.7, %latch.7 ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa.loopexit1:
-; PROLOG-NEXT:    %result.ph.ph2 = phi i32 [ 0, %header.prol ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa:
-; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph2, %latchExit.unr-lcssa.loopexit1 ]
-; PROLOG-NEXT:    br label %latchExit
-; PROLOG:       latchExit:
-; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %result
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit3:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    ret i32 42
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
@@ -1591,6 +1573,24 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header
+; PROLOG:       latchExit.unr-lcssa.loopexit:
+; PROLOG-NEXT:    %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ], [ %add.7, %latch.7 ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa.loopexit1:
+; PROLOG-NEXT:    %result.ph.ph2 = phi i32 [ 0, %header.prol ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa:
+; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph2, %latchExit.unr-lcssa.loopexit1 ]
+; PROLOG-NEXT:    br label %latchExit
+; PROLOG:       latchExit:
+; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %result
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit3:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    ret i32 42
 ;
 ; PROLOG-BLOCK-LABEL: @hdr_latch_same_exit(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -1629,6 +1629,16 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.unr-lcssa.loopexit, label %for.exiting_block.1
+; PROLOG-BLOCK:       for.exiting_block.1:
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
+; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
+; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
+; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
 ; PROLOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; PROLOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ %add.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
@@ -1642,16 +1652,6 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    br label %for.exit2
 ; PROLOG-BLOCK:       for.exit2:
 ; PROLOG-BLOCK-NEXT:    ret i32 42
-; PROLOG-BLOCK:       for.exiting_block.1:
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
-; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
 ;
 
 entry:
@@ -1710,61 +1710,13 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
-; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !5
-; EPILOG:       latchExit.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ 2, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
-; EPILOG-NEXT:    br label %latchExit
-; EPILOG:       latchExit:
-; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %result
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    ret i32 42
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-NEXT:    %add.1 = add nsw i32 %4, %add
+; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
@@ -1773,8 +1725,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
-; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.2, align 4
-; EPILOG-NEXT:    %add.2 = add nsw i32 %5, %add.1
+; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
+; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
 ; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
@@ -1783,8 +1735,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
-; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.3, align 4
-; EPILOG-NEXT:    %add.3 = add nsw i32 %6, %add.2
+; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
+; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
 ; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
@@ -1793,8 +1745,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
-; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.4, align 4
-; EPILOG-NEXT:    %add.4 = add nsw i32 %7, %add.3
+; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
+; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
 ; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
@@ -1803,8 +1755,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
-; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.5, align 4
-; EPILOG-NEXT:    %add.5 = add nsw i32 %8, %add.4
+; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
+; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
 ; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
@@ -1813,8 +1765,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
-; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.6, align 4
-; EPILOG-NEXT:    %add.6 = add nsw i32 %9, %add.5
+; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
+; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
 ; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
@@ -1823,12 +1775,60 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
-; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.7, align 4
-; EPILOG-NEXT:    %add.7 = add nsw i32 %10, %add.6
+; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
+; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
+; EPILOG:       latchExit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG:       latchExit.unr-lcssa:
+; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !5
+; EPILOG:       latchExit.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa.loopexit3:
+; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ 2, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa:
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    br label %latchExit
+; EPILOG:       latchExit:
+; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %result
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit2:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    ret i32 42
 ;
 ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -1854,6 +1854,17 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
@@ -1874,8 +1885,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
-; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %4, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ]
@@ -1890,17 +1901,6 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
 ; EPILOG-BLOCK-NEXT:    ret i32 42
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
 ;
 ; PROLOG-LABEL: @otherblock_latch_same_exit(
 ; PROLOG-NEXT:  entry:
@@ -1953,24 +1953,6 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG:       latchExit.unr-lcssa.loopexit:
-; PROLOG-NEXT:    %result.ph.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa.loopexit2:
-; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ 2, %for.exiting_block.prol ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa:
-; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
-; PROLOG-NEXT:    br label %latchExit
-; PROLOG:       latchExit:
-; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %result
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit1:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    ret i32 42
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
@@ -2035,6 +2017,24 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header
+; PROLOG:       latchExit.unr-lcssa.loopexit:
+; PROLOG-NEXT:    %result.ph.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa.loopexit2:
+; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ 2, %for.exiting_block.prol ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa:
+; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
+; PROLOG-NEXT:    br label %latchExit
+; PROLOG:       latchExit:
+; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %result
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit1:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    ret i32 42
 ;
 ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -2073,6 +2073,16 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; PROLOG-BLOCK:       for.exiting_block.1:
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
+; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
+; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
+; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
 ; PROLOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; PROLOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ %add.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
@@ -2086,16 +2096,6 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    br label %for.exit2
 ; PROLOG-BLOCK:       for.exit2:
 ; PROLOG-BLOCK-NEXT:    ret i32 42
-; PROLOG-BLOCK:       for.exiting_block.1:
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
-; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
 ;
 
 entry:
@@ -2155,61 +2155,13 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
-; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !6
-; EPILOG:       latchExit.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
-; EPILOG-NEXT:    br label %latchExit
-; EPILOG:       latchExit:
-; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %result
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    ret i32 42
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-NEXT:    %add.1 = add nsw i32 %4, %add
+; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
@@ -2218,8 +2170,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
-; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.2, align 4
-; EPILOG-NEXT:    %add.2 = add nsw i32 %5, %add.1
+; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
+; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
 ; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
@@ -2228,8 +2180,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
-; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.3, align 4
-; EPILOG-NEXT:    %add.3 = add nsw i32 %6, %add.2
+; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
+; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
 ; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
@@ -2238,8 +2190,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
-; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.4, align 4
-; EPILOG-NEXT:    %add.4 = add nsw i32 %7, %add.3
+; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
+; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
 ; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
@@ -2248,8 +2200,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
-; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.5, align 4
-; EPILOG-NEXT:    %add.5 = add nsw i32 %8, %add.4
+; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
+; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
 ; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
@@ -2258,8 +2210,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
-; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.6, align 4
-; EPILOG-NEXT:    %add.6 = add nsw i32 %9, %add.5
+; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
+; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
 ; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
@@ -2268,12 +2220,60 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
-; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.7, align 4
-; EPILOG-NEXT:    %add.7 = add nsw i32 %10, %add.6
+; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
+; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
+; EPILOG:       latchExit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG:       latchExit.unr-lcssa:
+; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !6
+; EPILOG:       latchExit.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa.loopexit3:
+; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa:
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    br label %latchExit
+; EPILOG:       latchExit:
+; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %result
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit2:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    ret i32 42
 ;
 ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit2(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -2299,6 +2299,17 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
@@ -2319,8 +2330,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
-; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %4, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
@@ -2335,17 +2346,6 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
 ; EPILOG-BLOCK-NEXT:    ret i32 42
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
 ;
 ; PROLOG-LABEL: @otherblock_latch_same_exit2(
 ; PROLOG-NEXT:  entry:
@@ -2398,24 +2398,6 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG:       latchExit.unr-lcssa.loopexit:
-; PROLOG-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa.loopexit2:
-; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ %sum.02.prol, %for.exiting_block.prol ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa:
-; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
-; PROLOG-NEXT:    br label %latchExit
-; PROLOG:       latchExit:
-; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %result
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit1:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    ret i32 42
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
@@ -2480,6 +2462,24 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header
+; PROLOG:       latchExit.unr-lcssa.loopexit:
+; PROLOG-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa.loopexit2:
+; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ %sum.02.prol, %for.exiting_block.prol ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa:
+; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
+; PROLOG-NEXT:    br label %latchExit
+; PROLOG:       latchExit:
+; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %result
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit1:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    ret i32 42
 ;
 ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit2(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -2518,6 +2518,16 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %add = add nsw i32 %3, %sum.02
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; PROLOG-BLOCK:       for.exiting_block.1:
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
+; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
+; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
+; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
 ; PROLOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; PROLOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
@@ -2531,16 +2541,6 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    br label %for.exit2
 ; PROLOG-BLOCK:       for.exit2:
 ; PROLOG-BLOCK-NEXT:    ret i32 42
-; PROLOG-BLOCK:       for.exiting_block.1:
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
-; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
 ;
 
 entry:
@@ -2601,58 +2601,10 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
-; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !7
-; EPILOG:       latchExit.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
-; EPILOG-NEXT:    br label %latchExit
-; EPILOG:       latchExit:
-; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %result
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    ret i32 42
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-NEXT:    %add.1 = add nsw i32 %4, %add
+; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
 ; EPILOG:       latch.1:
@@ -2661,8 +2613,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
-; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.2, align 4
-; EPILOG-NEXT:    %add.2 = add nsw i32 %5, %add.1
+; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
+; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2
 ; EPILOG:       latch.2:
@@ -2671,8 +2623,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
-; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.3, align 4
-; EPILOG-NEXT:    %add.3 = add nsw i32 %6, %add.2
+; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
+; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3
 ; EPILOG:       latch.3:
@@ -2681,8 +2633,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
-; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.4, align 4
-; EPILOG-NEXT:    %add.4 = add nsw i32 %7, %add.3
+; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
+; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4
 ; EPILOG:       latch.4:
@@ -2691,8 +2643,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
-; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.5, align 4
-; EPILOG-NEXT:    %add.5 = add nsw i32 %8, %add.4
+; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
+; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5
 ; EPILOG:       latch.5:
@@ -2701,8 +2653,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
-; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.6, align 4
-; EPILOG-NEXT:    %add.6 = add nsw i32 %9, %add.5
+; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
+; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6
 ; EPILOG:       latch.6:
@@ -2711,8 +2663,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
-; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.7, align 4
-; EPILOG-NEXT:    %add.7 = add nsw i32 %10, %add.6
+; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
+; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7
 ; EPILOG:       latch.7:
@@ -2720,6 +2672,54 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
+; EPILOG:       latchExit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG:       latchExit.unr-lcssa:
+; EPILOG-NEXT:    %result.ph = phi i32 [ undef, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %10 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !7
+; EPILOG:       latchExit.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa.loopexit3:
+; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
+; EPILOG:       latchExit.epilog-lcssa:
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    br label %latchExit
+; EPILOG:       latchExit:
+; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %result
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit2:
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    ret i32 42
 ;
 ; EPILOG-BLOCK-LABEL: @otherblock_latch_same_exit3(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -2745,6 +2745,17 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
@@ -2762,8 +2773,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
-; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %3, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %4, %sum.02.unr
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
@@ -2781,17 +2792,6 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
 ; EPILOG-BLOCK-NEXT:    ret i32 42
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
 ;
 ; PROLOG-LABEL: @otherblock_latch_same_exit3(
 ; PROLOG-NEXT:  entry:
@@ -2844,24 +2844,6 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       latch:
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG:       latchExit.unr-lcssa.loopexit:
-; PROLOG-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa.loopexit2:
-; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ %sum.02.prol, %for.exiting_block.prol ]
-; PROLOG-NEXT:    br label %latchExit.unr-lcssa
-; PROLOG:       latchExit.unr-lcssa:
-; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
-; PROLOG-NEXT:    br label %latchExit
-; PROLOG:       latchExit:
-; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %result
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit1:
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    ret i32 42
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
 ; PROLOG-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
@@ -2926,6 +2908,24 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %latchExit.unr-lcssa.loopexit, label %header
+; PROLOG:       latchExit.unr-lcssa.loopexit:
+; PROLOG-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ], [ %add.7, %latch.7 ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa.loopexit2:
+; PROLOG-NEXT:    %result.ph.ph3 = phi i32 [ %sum.02.prol, %for.exiting_block.prol ]
+; PROLOG-NEXT:    br label %latchExit.unr-lcssa
+; PROLOG:       latchExit.unr-lcssa:
+; PROLOG-NEXT:    %result.ph = phi i32 [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ], [ %result.ph.ph3, %latchExit.unr-lcssa.loopexit2 ]
+; PROLOG-NEXT:    br label %latchExit
+; PROLOG:       latchExit:
+; PROLOG-NEXT:    %result = phi i32 [ %result.unr, %header.prol.loopexit ], [ %result.ph, %latchExit.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %result
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit1:
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    ret i32 42
 ;
 ; PROLOG-BLOCK-LABEL: @otherblock_latch_same_exit3(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -2964,6 +2964,16 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK:       latch:
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
+; PROLOG-BLOCK:       for.exiting_block.1:
+; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
+; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
+; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
 ; PROLOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; PROLOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
@@ -2977,16 +2987,6 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    br label %for.exit2
 ; PROLOG-BLOCK:       for.exit2:
 ; PROLOG-BLOCK-NEXT:    ret i32 42
-; PROLOG-BLOCK:       for.exiting_block.1:
-; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; PROLOG-BLOCK-NEXT:    %4 = load i32, i32* %arrayidx.1, align 4
-; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %4, %add
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.unr-lcssa.loopexit, label %latch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
-; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
 ;
 
 entry:
@@ -3091,52 +3091,6 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-NEXT:    %sum.next = add i64 %sum, %add
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
-; EPILOG:       exit1.loopexit:
-; EPILOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit2:
-; EPILOG-NEXT:    %result.ph3 = phi i64 [ %ivy.epil, %loop_exiting.epil ], [ %ivy.epil, %loop_exiting.epil ]
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph3, %exit1.loopexit2 ]
-; EPILOG-NEXT:    ret i64 %result
-; EPILOG:       latchexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.next.lcssa.ph.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %latchexit.unr-lcssa
-; EPILOG:       latchexit.unr-lcssa:
-; EPILOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ undef, %entry ], [ %sum.next.lcssa.ph.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %latchexit
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
-; EPILOG:       loop_exiting.epil:
-; EPILOG-NEXT:    %ivy.epil = add i64 %iv.epil, %add
-; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
-; EPILOG-NEXT:    i64 24, label %exit1.loopexit2
-; EPILOG-NEXT:    i64 42, label %exit1.loopexit2
-; EPILOG-NEXT:    ]
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %latchexit.epilog-lcssa, !llvm.loop !8
-; EPILOG:       latchexit.epilog-lcssa:
-; EPILOG-NEXT:    %sum.next.lcssa.ph1 = phi i64 [ %sum.next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    br label %latchexit
-; EPILOG:       latchexit:
-; EPILOG-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.lcssa.ph1, %latchexit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i64 %sum.next.lcssa
 ; EPILOG:       loop_exiting.1:
 ; EPILOG-NEXT:    %ivy.1 = add i64 %iv_next, %add
 ; EPILOG-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
@@ -3215,6 +3169,52 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %latchexit.unr-lcssa.loopexit
+; EPILOG:       exit1.loopexit:
+; EPILOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1.loopexit2:
+; EPILOG-NEXT:    %result.ph3 = phi i64 [ %ivy.epil, %loop_exiting.epil ], [ %ivy.epil, %loop_exiting.epil ]
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph3, %exit1.loopexit2 ]
+; EPILOG-NEXT:    ret i64 %result
+; EPILOG:       latchexit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %sum.next.lcssa.ph.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %latchexit.unr-lcssa
+; EPILOG:       latchexit.unr-lcssa:
+; EPILOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ undef, %entry ], [ %sum.next.lcssa.ph.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %latchexit
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
+; EPILOG:       loop_exiting.epil:
+; EPILOG-NEXT:    %ivy.epil = add i64 %iv.epil, %add
+; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-NEXT:    i64 24, label %exit1.loopexit2
+; EPILOG-NEXT:    i64 42, label %exit1.loopexit2
+; EPILOG-NEXT:    ]
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
+; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %latchexit.epilog-lcssa, !llvm.loop !8
+; EPILOG:       latchexit.epilog-lcssa:
+; EPILOG-NEXT:    %sum.next.lcssa.ph1 = phi i64 [ %sum.next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    br label %latchexit
+; EPILOG:       latchexit:
+; EPILOG-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.lcssa.ph1, %latchexit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i64 %sum.next.lcssa
 ;
 ; EPILOG-BLOCK-LABEL: @test5(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -3241,6 +3241,18 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
+; EPILOG-BLOCK:       loop_exiting.1:
+; EPILOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
+; EPILOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
+; EPILOG-BLOCK-NEXT:    i64 42, label %exit1.loopexit
+; EPILOG-BLOCK-NEXT:    ]
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !8
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit1
@@ -3274,18 +3286,6 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK:       latchexit:
 ; EPILOG-BLOCK-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %loop_latch.epil ]
 ; EPILOG-BLOCK-NEXT:    ret i64 %sum.next.lcssa
-; EPILOG-BLOCK:       loop_exiting.1:
-; EPILOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
-; EPILOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
-; EPILOG-BLOCK-NEXT:    i64 42, label %exit1.loopexit
-; EPILOG-BLOCK-NEXT:    ]
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !8
 ;
 ; PROLOG-LABEL: @test5(
 ; PROLOG-NEXT:  entry:
@@ -3340,21 +3340,6 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; PROLOG-NEXT:    %sum.next = add i64 %sum, %add
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
-; PROLOG:       exit1.loopexit:
-; PROLOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1.loopexit1:
-; PROLOG-NEXT:    %result.ph2 = phi i64 [ %ivy.prol, %loop_exiting.prol ], [ %ivy.prol, %loop_exiting.prol ]
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph2, %exit1.loopexit1 ]
-; PROLOG-NEXT:    ret i64 %result
-; PROLOG:       latchexit.unr-lcssa:
-; PROLOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; PROLOG-NEXT:    br label %latchexit
-; PROLOG:       latchexit:
-; PROLOG-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.unr, %loop_header.prol.loopexit ], [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ]
-; PROLOG-NEXT:    ret i64 %sum.next.lcssa
 ; PROLOG:       loop_exiting.1:
 ; PROLOG-NEXT:    %ivy.1 = add i64 %iv_next, %add
 ; PROLOG-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
@@ -3426,6 +3411,21 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %latchexit.unr-lcssa
+; PROLOG:       exit1.loopexit:
+; PROLOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1.loopexit1:
+; PROLOG-NEXT:    %result.ph2 = phi i64 [ %ivy.prol, %loop_exiting.prol ], [ %ivy.prol, %loop_exiting.prol ]
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph2, %exit1.loopexit1 ]
+; PROLOG-NEXT:    ret i64 %result
+; PROLOG:       latchexit.unr-lcssa:
+; PROLOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
+; PROLOG-NEXT:    br label %latchexit
+; PROLOG:       latchexit:
+; PROLOG-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.unr, %loop_header.prol.loopexit ], [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ]
+; PROLOG-NEXT:    ret i64 %sum.next.lcssa
 ;
 ; PROLOG-BLOCK-LABEL: @test5(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -3466,6 +3466,17 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; PROLOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
+; PROLOG-BLOCK:       loop_exiting.1:
+; PROLOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
+; PROLOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
+; PROLOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
+; PROLOG-BLOCK-NEXT:    i64 42, label %exit1.loopexit
+; PROLOG-BLOCK-NEXT:    ]
+; PROLOG-BLOCK:       loop_latch.1:
+; PROLOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
+; PROLOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %latchexit.unr-lcssa, !llvm.loop !8
 ; PROLOG-BLOCK:       exit1.loopexit:
 ; PROLOG-BLOCK-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %exit1
@@ -3478,17 +3489,6 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-BLOCK:       latchexit:
 ; PROLOG-BLOCK-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.unr, %loop_header.prol.loopexit ], [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ]
 ; PROLOG-BLOCK-NEXT:    ret i64 %sum.next.lcssa
-; PROLOG-BLOCK:       loop_exiting.1:
-; PROLOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
-; PROLOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
-; PROLOG-BLOCK-NEXT:    i64 24, label %exit1.loopexit
-; PROLOG-BLOCK-NEXT:    i64 42, label %exit1.loopexit
-; PROLOG-BLOCK-NEXT:    ]
-; PROLOG-BLOCK:       loop_latch.1:
-; PROLOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; PROLOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %latchexit.unr-lcssa, !llvm.loop !8
 ;
 
 entry:
@@ -3546,56 +3546,6 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
-; EPILOG:       latch_exit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latch_exit.unr-lcssa
-; EPILOG:       latch_exit.unr-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ undef, %entry ], [ %sum.0.lcssa.ph.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latch_exit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
-; EPILOG:       for.exiting_block.epil:
-; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
-; EPILOG-NEXT:    %load.epil = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.epil
-; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latch_exit.epilog-lcssa, !llvm.loop !9
-; EPILOG:       latch_exit.epilog-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    br label %latch_exit
-; EPILOG:       latch_exit:
-; EPILOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %sum.0.lcssa.ph1, %latch_exit.epilog-lcssa ]
-; EPILOG-NEXT:    ret i32 %sum.0.lcssa
-; EPILOG:       for.exit2.loopexit:
-; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ]
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
-; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2:
-; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
-; EPILOG-NEXT:    %addx = add i32 %retval, %x
-; EPILOG-NEXT:    br i1 %cond, label %exit_true, label %exit_false
-; EPILOG:       exit_true:
-; EPILOG-NEXT:    ret i32 %retval
-; EPILOG:       exit_false:
-; EPILOG-NEXT:    ret i32 %addx
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; EPILOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
@@ -3667,6 +3617,56 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latch_exit.unr-lcssa.loopexit, label %header
+; EPILOG:       latch_exit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    br label %latch_exit.unr-lcssa
+; EPILOG:       latch_exit.unr-lcssa:
+; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ undef, %entry ], [ %sum.0.lcssa.ph.ph, %latch_exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latch_exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latch_exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latch_exit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG:       for.exiting_block.epil:
+; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-NEXT:    %load.epil = load i32, i32* %arrayidx.epil, align 4
+; EPILOG-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.epil
+; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
+; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latch_exit.epilog-lcssa, !llvm.loop !9
+; EPILOG:       latch_exit.epilog-lcssa:
+; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    br label %latch_exit
+; EPILOG:       latch_exit:
+; EPILOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %sum.0.lcssa.ph1, %latch_exit.epilog-lcssa ]
+; EPILOG-NEXT:    ret i32 %sum.0.lcssa
+; EPILOG:       for.exit2.loopexit:
+; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2.loopexit2:
+; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG-NEXT:    br label %for.exit2
+; EPILOG:       for.exit2:
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    %addx = add i32 %retval, %x
+; EPILOG-NEXT:    br i1 %cond, label %exit_true, label %exit_false
+; EPILOG:       exit_true:
+; EPILOG-NEXT:    ret i32 %retval
+; EPILOG:       exit_false:
+; EPILOG-NEXT:    ret i32 %addx
 ;
 ; EPILOG-BLOCK-LABEL: @test6(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -3692,6 +3692,17 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
+; EPILOG-BLOCK:       for.exiting_block.1:
+; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; EPILOG-BLOCK-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
+; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %load.1, %add
+; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latch_exit.unr-lcssa.loopexit, label %header, !llvm.loop !9
 ; EPILOG-BLOCK:       latch_exit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
@@ -3729,17 +3740,6 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    ret i32 %retval
 ; EPILOG-BLOCK:       exit_false:
 ; EPILOG-BLOCK-NEXT:    ret i32 %addx
-; EPILOG-BLOCK:       for.exiting_block.1:
-; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; EPILOG-BLOCK-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
-; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %load.1, %add
-; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latch_exit.unr-lcssa.loopexit, label %header, !llvm.loop !9
 ;
 ; PROLOG-LABEL: @test6(
 ; PROLOG-NEXT:  entry:
@@ -3792,26 +3792,6 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-NEXT:    %add = add nsw i32 %load, %sum.02
 ; PROLOG-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
-; PROLOG:       latch_exit.unr-lcssa:
-; PROLOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %latch.7 ]
-; PROLOG-NEXT:    br label %latch_exit
-; PROLOG:       latch_exit:
-; PROLOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ]
-; PROLOG-NEXT:    ret i32 %sum.0.lcssa
-; PROLOG:       for.exit2.loopexit:
-; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ]
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2.loopexit1:
-; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
-; PROLOG-NEXT:    br label %for.exit2
-; PROLOG:       for.exit2:
-; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
-; PROLOG-NEXT:    %addx = add i32 %retval, %x
-; PROLOG-NEXT:    br i1 %cond, label %exit_true, label %exit_false
-; PROLOG:       exit_true:
-; PROLOG-NEXT:    ret i32 %retval
-; PROLOG:       exit_false:
-; PROLOG-NEXT:    ret i32 %addx
 ; PROLOG:       for.exiting_block.1:
 ; PROLOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
 ; PROLOG-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
@@ -3876,6 +3856,26 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
 ; PROLOG-NEXT:    %exitcond.7 = icmp eq i64 %indvars.iv.next.7, %n
 ; PROLOG-NEXT:    br i1 %exitcond.7, label %latch_exit.unr-lcssa, label %header
+; PROLOG:       latch_exit.unr-lcssa:
+; PROLOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %latch.7 ]
+; PROLOG-NEXT:    br label %latch_exit
+; PROLOG:       latch_exit:
+; PROLOG-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.unr, %header.prol.loopexit ], [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ]
+; PROLOG-NEXT:    ret i32 %sum.0.lcssa
+; PROLOG:       for.exit2.loopexit:
+; PROLOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2.loopexit1:
+; PROLOG-NEXT:    %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ]
+; PROLOG-NEXT:    br label %for.exit2
+; PROLOG:       for.exit2:
+; PROLOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ]
+; PROLOG-NEXT:    %addx = add i32 %retval, %x
+; PROLOG-NEXT:    br i1 %cond, label %exit_true, label %exit_false
+; PROLOG:       exit_true:
+; PROLOG-NEXT:    ret i32 %retval
+; PROLOG:       exit_false:
+; PROLOG-NEXT:    ret i32 %addx
 ;
 ; PROLOG-BLOCK-LABEL: @test6(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -3914,6 +3914,16 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-BLOCK-NEXT:    %add = add nsw i32 %load, %sum.02
 ; PROLOG-BLOCK-NEXT:    %indvars.iv.next = add i64 %indvars.iv, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
+; PROLOG-BLOCK:       for.exiting_block.1:
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+; PROLOG-BLOCK-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
+; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %load.1, %add
+; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
+; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
+; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latch_exit.unr-lcssa, label %header, !llvm.loop !9
 ; PROLOG-BLOCK:       latch_exit.unr-lcssa:
 ; PROLOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.1, %latch.1 ]
 ; PROLOG-BLOCK-NEXT:    br label %latch_exit
@@ -3931,16 +3941,6 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-BLOCK-NEXT:    ret i32 %retval
 ; PROLOG-BLOCK:       exit_false:
 ; PROLOG-BLOCK-NEXT:    ret i32 %addx
-; PROLOG-BLOCK:       for.exiting_block.1:
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %for.exit2.loopexit, label %latch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-; PROLOG-BLOCK-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
-; PROLOG-BLOCK-NEXT:    %add.1 = add nsw i32 %load.1, %add
-; PROLOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; PROLOG-BLOCK-NEXT:    %exitcond.1 = icmp eq i64 %indvars.iv.next.1, %n
-; PROLOG-BLOCK-NEXT:    br i1 %exitcond.1, label %latch_exit.unr-lcssa, label %header, !llvm.loop !9
 ;
 
 entry:
@@ -4004,6 +4004,35 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-NEXT:    %add = add nuw nsw i64 %i6, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
+; EPILOG:       latch.1:
+; EPILOG-NEXT:    %add.1 = add nuw nsw i64 %add, 1
+; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.2
+; EPILOG:       latch.2:
+; EPILOG-NEXT:    %add.2 = add nuw nsw i64 %add.1, 1
+; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.3
+; EPILOG:       latch.3:
+; EPILOG-NEXT:    %add.3 = add nuw nsw i64 %add.2, 1
+; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.4
+; EPILOG:       latch.4:
+; EPILOG-NEXT:    %add.4 = add nuw nsw i64 %add.3, 1
+; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.5
+; EPILOG:       latch.5:
+; EPILOG-NEXT:    %add.5 = add nuw nsw i64 %add.4, 1
+; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.6
+; EPILOG:       latch.6:
+; EPILOG-NEXT:    %add.6 = add nuw nsw i64 %add.5, 1
+; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.7
+; EPILOG:       latch.7:
+; EPILOG-NEXT:    %add.7 = add nuw nsw i64 %add.6, 1
+; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchexit.unr-lcssa.loopexit
 ; EPILOG:       latchexit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %i6.unr.ph = phi i64 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    br label %latchexit.unr-lcssa
@@ -4038,35 +4067,6 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG:       loopexit1:
 ; EPILOG-NEXT:    %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
 ; EPILOG-NEXT:    ret i32 %sext3
-; EPILOG:       latch.1:
-; EPILOG-NEXT:    %add.1 = add nuw nsw i64 %add, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.2
-; EPILOG:       latch.2:
-; EPILOG-NEXT:    %add.2 = add nuw nsw i64 %add.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.3
-; EPILOG:       latch.3:
-; EPILOG-NEXT:    %add.3 = add nuw nsw i64 %add.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.4
-; EPILOG:       latch.4:
-; EPILOG-NEXT:    %add.4 = add nuw nsw i64 %add.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.5
-; EPILOG:       latch.5:
-; EPILOG-NEXT:    %add.5 = add nuw nsw i64 %add.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.6
-; EPILOG:       latch.6:
-; EPILOG-NEXT:    %add.6 = add nuw nsw i64 %add.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.7
-; EPILOG:       latch.7:
-; EPILOG-NEXT:    %add.7 = add nuw nsw i64 %add.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchexit.unr-lcssa.loopexit
 ;
 ; EPILOG-BLOCK-LABEL: @test7(
 ; EPILOG-BLOCK-NEXT:  bb:
@@ -4091,6 +4091,11 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK-NEXT:    %add = add nuw nsw i64 %i6, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %add, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !10
 ; EPILOG-BLOCK:       latchexit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %latchexit.unr-lcssa
 ; EPILOG-BLOCK:       latchexit.unr-lcssa:
@@ -4112,11 +4117,6 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK:       loopexit1:
 ; EPILOG-BLOCK-NEXT:    %sext3 = phi i32 [ %shft, %header.epil ], [ %sext3.ph, %loopexit1.loopexit ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %sext3
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %add, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !10
 ;
 ; PROLOG-LABEL: @test7(
 ; PROLOG-NEXT:  bb:
@@ -4157,21 +4157,6 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG:       latch:
 ; PROLOG-NEXT:    %add = add nuw nsw i64 %i6, 1
 ; PROLOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
-; PROLOG:       latchexit.unr-lcssa:
-; PROLOG-NEXT:    br label %latchexit
-; PROLOG:       latchexit:
-; PROLOG-NEXT:    unreachable
-; PROLOG:       loopexit2:
-; PROLOG-NEXT:    ret i32 %shft
-; PROLOG:       loopexit1.loopexit:
-; PROLOG-NEXT:    %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ]
-; PROLOG-NEXT:    br label %loopexit1
-; PROLOG:       loopexit1.loopexit1:
-; PROLOG-NEXT:    %sext3.ph2 = phi i32 [ %shft, %header.prol ]
-; PROLOG-NEXT:    br label %loopexit1
-; PROLOG:       loopexit1:
-; PROLOG-NEXT:    %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
-; PROLOG-NEXT:    ret i32 %sext3
 ; PROLOG:       latch.1:
 ; PROLOG-NEXT:    %add.1 = add nuw nsw i64 %add, 1
 ; PROLOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.2
@@ -4194,6 +4179,21 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG-NEXT:    %add.7 = add nuw nsw i64 %add.6, 1
 ; PROLOG-NEXT:    %i9.7 = icmp slt i64 %add.7, %sext
 ; PROLOG-NEXT:    br i1 %i9.7, label %header, label %latchexit.unr-lcssa
+; PROLOG:       latchexit.unr-lcssa:
+; PROLOG-NEXT:    br label %latchexit
+; PROLOG:       latchexit:
+; PROLOG-NEXT:    unreachable
+; PROLOG:       loopexit2:
+; PROLOG-NEXT:    ret i32 %shft
+; PROLOG:       loopexit1.loopexit:
+; PROLOG-NEXT:    %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ]
+; PROLOG-NEXT:    br label %loopexit1
+; PROLOG:       loopexit1.loopexit1:
+; PROLOG-NEXT:    %sext3.ph2 = phi i32 [ %shft, %header.prol ]
+; PROLOG-NEXT:    br label %loopexit1
+; PROLOG:       loopexit1:
+; PROLOG-NEXT:    %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
+; PROLOG-NEXT:    ret i32 %sext3
 ;
 ; PROLOG-BLOCK-LABEL: @test7(
 ; PROLOG-BLOCK-NEXT:  bb:
@@ -4225,6 +4225,10 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG-BLOCK:       latch:
 ; PROLOG-BLOCK-NEXT:    %add = add nuw nsw i64 %i6, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %add, 1
+; PROLOG-BLOCK-NEXT:    %i9.1 = icmp slt i64 %add.1, %sext
+; PROLOG-BLOCK-NEXT:    br i1 %i9.1, label %header, label %latchexit.unr-lcssa, !llvm.loop !10
 ; PROLOG-BLOCK:       latchexit.unr-lcssa:
 ; PROLOG-BLOCK-NEXT:    br label %latchexit
 ; PROLOG-BLOCK:       latchexit:
@@ -4237,10 +4241,6 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG-BLOCK:       loopexit1:
 ; PROLOG-BLOCK-NEXT:    %sext3 = phi i32 [ %shft, %header.prol ], [ %sext3.ph, %loopexit1.loopexit ]
 ; PROLOG-BLOCK-NEXT:    ret i32 %sext3
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %add, 1
-; PROLOG-BLOCK-NEXT:    %i9.1 = icmp slt i64 %add.1, %sext
-; PROLOG-BLOCK-NEXT:    br i1 %i9.1, label %header, label %latchexit.unr-lcssa, !llvm.loop !10
 ;
 bb:
   %i = icmp slt i32 undef, 2
@@ -4303,31 +4303,6 @@ define void @test8() {
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
-; EPILOG:       exit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ]
-; EPILOG-NEXT:    br label %exit.unr-lcssa
-; EPILOG:       exit.unr-lcssa:
-; EPILOG-NEXT:    %i3.unr = phi i64 [ %i, %outerloop ], [ %i3.unr.ph, %exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
-; EPILOG:       innerH.epil.preheader:
-; EPILOG-NEXT:    br label %innerH.epil
-; EPILOG:       innerH.epil:
-; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %innerH.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    %i4.epil = add nuw nsw i64 %i3.epil, 1
-; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %i6.epil = icmp ult i64 %i4.epil, 100
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %innerH.epil, label %exit.epilog-lcssa, !llvm.loop !11
-; EPILOG:       exit.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit
-; EPILOG:       exit.loopexit:
-; EPILOG-NEXT:    br label %exit
-; EPILOG:       exit:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
 ; EPILOG-NEXT:    %i4.2 = add nuw nsw i64 %i4.1, 1
@@ -4356,6 +4331,31 @@ define void @test8() {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit
+; EPILOG:       exit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ]
+; EPILOG-NEXT:    br label %exit.unr-lcssa
+; EPILOG:       exit.unr-lcssa:
+; EPILOG-NEXT:    %i3.unr = phi i64 [ %i, %outerloop ], [ %i3.unr.ph, %exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
+; EPILOG:       innerH.epil.preheader:
+; EPILOG-NEXT:    br label %innerH.epil
+; EPILOG:       innerH.epil:
+; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %innerH.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %i4.epil = add nuw nsw i64 %i3.epil, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %i6.epil = icmp ult i64 %i4.epil, 100
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %innerH.epil, label %exit.epilog-lcssa, !llvm.loop !11
+; EPILOG:       exit.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit
+; EPILOG:       exit.loopexit:
+; EPILOG-NEXT:    br label %exit
+; EPILOG:       exit:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test8(
 ; EPILOG-BLOCK-NEXT:  bb:
@@ -4364,6 +4364,33 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit
 ; EPILOG-BLOCK:       outerloop.loopexit:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %exit.unr-lcssa.1, label %outerloop.new.1
+; EPILOG-BLOCK:       outerloop.new.1:
+; EPILOG-BLOCK-NEXT:    br label %innerH.1
+; EPILOG-BLOCK:       innerH.1:
+; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 100, %outerloop.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13
+; EPILOG-BLOCK:       latch.13:
+; EPILOG-BLOCK-NEXT:    %niter.nsub.12 = sub i64 %niter.1, 1
+; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
+; EPILOG-BLOCK:       latch.1.1:
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i64 %niter.nsub.12, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.nsub.1.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !11
+; EPILOG-BLOCK:       exit.unr-lcssa.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa.1
+; EPILOG-BLOCK:       outerloop.loopexit.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
+; EPILOG-BLOCK:       exit.unr-lcssa.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %innerH.epil.preheader.1, label %exit.loopexit
+; EPILOG-BLOCK:       innerH.epil.preheader.1:
+; EPILOG-BLOCK-NEXT:    br label %innerH.epil.1
+; EPILOG-BLOCK:       innerH.epil.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.epil
+; EPILOG-BLOCK:       outerloop.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %outerloop, !llvm.loop !12
 ; EPILOG-BLOCK:       outerloop:
 ; EPILOG-BLOCK-NEXT:    %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
 ; EPILOG-BLOCK-NEXT:    %0 = sub i64 100, %i
@@ -4383,6 +4410,10 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
 ; EPILOG-BLOCK:       exit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa
 ; EPILOG-BLOCK:       exit.unr-lcssa:
@@ -4398,37 +4429,6 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    br label %exit
 ; EPILOG-BLOCK:       exit:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
-; EPILOG-BLOCK:       outerloop.new.1:
-; EPILOG-BLOCK-NEXT:    br label %innerH.1
-; EPILOG-BLOCK:       innerH.1:
-; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 100, %outerloop.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13
-; EPILOG-BLOCK:       latch.13:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.12 = sub i64 %niter.1, 1
-; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
-; EPILOG-BLOCK:       latch.1.1:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i64 %niter.nsub.12, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.nsub.1.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !11
-; EPILOG-BLOCK:       exit.unr-lcssa.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa.1
-; EPILOG-BLOCK:       outerloop.loopexit.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
-; EPILOG-BLOCK:       exit.unr-lcssa.1:
-; EPILOG-BLOCK-NEXT:    br i1 false, label %innerH.epil.preheader.1, label %exit.loopexit
-; EPILOG-BLOCK:       innerH.epil.preheader.1:
-; EPILOG-BLOCK-NEXT:    br label %innerH.epil.1
-; EPILOG-BLOCK:       innerH.epil.1:
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.epil
-; EPILOG-BLOCK:       outerloop.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %outerloop, !llvm.loop !12
 ;
 ; PROLOG-LABEL: @test8(
 ; PROLOG-NEXT:  bb:
@@ -4474,12 +4474,6 @@ define void @test8() {
 ; PROLOG:       latch:
 ; PROLOG-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; PROLOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
-; PROLOG:       exit.unr-lcssa:
-; PROLOG-NEXT:    br label %exit
-; PROLOG:       exit.loopexit:
-; PROLOG-NEXT:    br label %exit
-; PROLOG:       exit:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       latch.1:
 ; PROLOG-NEXT:    %i4.2 = add nuw nsw i64 %i4.1, 1
 ; PROLOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.2
@@ -4501,6 +4495,12 @@ define void @test8() {
 ; PROLOG:       latch.7:
 ; PROLOG-NEXT:    %i6.7 = icmp ult i64 %i4.7, 100
 ; PROLOG-NEXT:    br i1 %i6.7, label %innerH, label %exit.unr-lcssa
+; PROLOG:       exit.unr-lcssa:
+; PROLOG-NEXT:    br label %exit
+; PROLOG:       exit.loopexit:
+; PROLOG-NEXT:    br label %exit
+; PROLOG:       exit:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test8(
 ; PROLOG-BLOCK-NEXT:  bb:
@@ -4509,6 +4509,31 @@ define void @test8() {
 ; PROLOG-BLOCK-NEXT:    br label %outerloop.loopexit
 ; PROLOG-BLOCK:       outerloop.loopexit:
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.1
+; PROLOG-BLOCK:       innerH.prol.preheader.1:
+; PROLOG-BLOCK-NEXT:    br label %innerH.prol.1
+; PROLOG-BLOCK:       innerH.prol.1:
+; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.prol.1
+; PROLOG-BLOCK:       latch.prol.1:
+; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit.1
+; PROLOG-BLOCK:       innerH.prol.loopexit.1:
+; PROLOG-BLOCK-NEXT:    %i3.unr.1 = phi i64 [ 0, %outerloop.loopexit ], [ 1, %latch.prol.1 ]
+; PROLOG-BLOCK-NEXT:    br i1 false, label %exit.loopexit, label %outerloop.new.1
+; PROLOG-BLOCK:       outerloop.new.1:
+; PROLOG-BLOCK-NEXT:    br label %innerH.1
+; PROLOG-BLOCK:       innerH.1:
+; PROLOG-BLOCK-NEXT:    %i3.1 = phi i64 [ %i3.unr.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
+; PROLOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
+; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12
+; PROLOG-BLOCK:       latch.12:
+; PROLOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
+; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
+; PROLOG-BLOCK:       latch.1.1:
+; PROLOG-BLOCK-NEXT:    %i6.1.1 = icmp ult i64 %i4.1.1, 100
+; PROLOG-BLOCK-NEXT:    br i1 %i6.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit3, !llvm.loop !11
+; PROLOG-BLOCK:       outerloop.loopexit.loopexit.1:
+; PROLOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
+; PROLOG-BLOCK:       outerloop.loopexit.1:
+; PROLOG-BLOCK-NEXT:    br label %outerloop, !llvm.loop !12
 ; PROLOG-BLOCK:       outerloop:
 ; PROLOG-BLOCK-NEXT:    %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
 ; PROLOG-BLOCK-NEXT:    %0 = sub i64 100, %i
@@ -4536,6 +4561,9 @@ define void @test8() {
 ; PROLOG-BLOCK:       latch:
 ; PROLOG-BLOCK-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %i6.1 = icmp ult i64 %i4.1, 100
+; PROLOG-BLOCK-NEXT:    br i1 %i6.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
 ; PROLOG-BLOCK:       exit.unr-lcssa.loopexit:
 ; PROLOG-BLOCK-NEXT:    br label %exit.unr-lcssa
 ; PROLOG-BLOCK:       exit.unr-lcssa.loopexit3:
@@ -4546,34 +4574,6 @@ define void @test8() {
 ; PROLOG-BLOCK-NEXT:    br label %exit
 ; PROLOG-BLOCK:       exit:
 ; PROLOG-BLOCK-NEXT:    ret void
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %i6.1 = icmp ult i64 %i4.1, 100
-; PROLOG-BLOCK-NEXT:    br i1 %i6.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
-; PROLOG-BLOCK:       innerH.prol.preheader.1:
-; PROLOG-BLOCK-NEXT:    br label %innerH.prol.1
-; PROLOG-BLOCK:       innerH.prol.1:
-; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.prol.1
-; PROLOG-BLOCK:       latch.prol.1:
-; PROLOG-BLOCK-NEXT:    br label %innerH.prol.loopexit.1
-; PROLOG-BLOCK:       innerH.prol.loopexit.1:
-; PROLOG-BLOCK-NEXT:    %i3.unr.1 = phi i64 [ 0, %outerloop.loopexit ], [ 1, %latch.prol.1 ]
-; PROLOG-BLOCK-NEXT:    br i1 false, label %exit.loopexit, label %outerloop.new.1
-; PROLOG-BLOCK:       outerloop.new.1:
-; PROLOG-BLOCK-NEXT:    br label %innerH.1
-; PROLOG-BLOCK:       innerH.1:
-; PROLOG-BLOCK-NEXT:    %i3.1 = phi i64 [ %i3.unr.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
-; PROLOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
-; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12
-; PROLOG-BLOCK:       latch.12:
-; PROLOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
-; PROLOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
-; PROLOG-BLOCK:       latch.1.1:
-; PROLOG-BLOCK-NEXT:    %i6.1.1 = icmp ult i64 %i4.1.1, 100
-; PROLOG-BLOCK-NEXT:    br i1 %i6.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit3, !llvm.loop !11
-; PROLOG-BLOCK:       outerloop.loopexit.loopexit.1:
-; PROLOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
-; PROLOG-BLOCK:       outerloop.loopexit.1:
-; PROLOG-BLOCK-NEXT:    br label %outerloop, !llvm.loop !12
 ;
 
 bb:
@@ -4636,33 +4636,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
 ; EPILOG-NEXT:    %niter.nsub = sub i32 %niter, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit
-; EPILOG:       outerLatch.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    br label %outerLatch.loopexit.unr-lcssa
-; EPILOG:       outerLatch.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %outerLatch.loopexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
-; EPILOG:       header.epil.preheader:
-; EPILOG-NEXT:    br label %header.epil
-; EPILOG:       header.epil:
-; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i32 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
-; EPILOG-NEXT:    %i7.epil = trunc i64 %phi.epil to i32
-; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
-; EPILOG:       latch.epil:
-; EPILOG-NEXT:    %i11.epil = add nsw i32 %i7.epil, 1
-; EPILOG-NEXT:    %innercnd.epil = icmp slt i32 %i11.epil, %trip
-; EPILOG-NEXT:    %iv.next.epil = add nuw nsw i64 %phi.epil, 1
-; EPILOG-NEXT:    %epil.iter.sub = sub i32 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %outerLatch.loopexit.epilog-lcssa, !llvm.loop !12
-; EPILOG:       outerLatch.loopexit.epilog-lcssa:
-; EPILOG-NEXT:    br label %outerLatch.loopexit
-; EPILOG:       outerLatch.loopexit:
-; EPILOG-NEXT:    br label %outerLatch
-; EPILOG:       outerLatch:
-; EPILOG-NEXT:    br label %outerloopHdr
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
@@ -4692,6 +4665,33 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i32 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit
+; EPILOG:       outerLatch.loopexit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    br label %outerLatch.loopexit.unr-lcssa
+; EPILOG:       outerLatch.loopexit.unr-lcssa:
+; EPILOG-NEXT:    %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %outerLatch.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i32 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %i7.epil = trunc i64 %phi.epil to i32
+; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %i11.epil = add nsw i32 %i7.epil, 1
+; EPILOG-NEXT:    %innercnd.epil = icmp slt i32 %i11.epil, %trip
+; EPILOG-NEXT:    %iv.next.epil = add nuw nsw i64 %phi.epil, 1
+; EPILOG-NEXT:    %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %outerLatch.loopexit.epilog-lcssa, !llvm.loop !12
+; EPILOG:       outerLatch.loopexit.epilog-lcssa:
+; EPILOG-NEXT:    br label %outerLatch.loopexit
+; EPILOG:       outerLatch.loopexit:
+; EPILOG-NEXT:    br label %outerLatch
+; EPILOG:       outerLatch:
+; EPILOG-NEXT:    br label %outerloopHdr
 ;
 ; EPILOG-BLOCK-LABEL: @test9(
 ; EPILOG-BLOCK-NEXT:  bb:
@@ -4733,6 +4733,11 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i32 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit.loopexit
+; EPILOG-BLOCK:       latch.1:
+; EPILOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !13
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa:
@@ -4750,11 +4755,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    %trip.1 = add i32 %n, -1
 ; EPILOG-BLOCK-NEXT:    %outercnd.1 = icmp slt i32 0, %trip.1
 ; EPILOG-BLOCK-NEXT:    br i1 %outercnd.1, label %preheader.1, label %outerLatch.1
-; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !13
 ; EPILOG-BLOCK:       preheader.1:
 ; EPILOG-BLOCK-NEXT:    %xtraiter.1 = and i32 %0, 1
 ; EPILOG-BLOCK-NEXT:    %3 = icmp ult i32 %1, 1
@@ -4844,12 +4844,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG:       latch:
 ; PROLOG-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
 ; PROLOG-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit
-; PROLOG:       outerLatch.loopexit.unr-lcssa:
-; PROLOG-NEXT:    br label %outerLatch.loopexit
-; PROLOG:       outerLatch.loopexit:
-; PROLOG-NEXT:    br label %outerLatch
-; PROLOG:       outerLatch:
-; PROLOG-NEXT:    br label %outerloopHdr
 ; PROLOG:       latch.1:
 ; PROLOG-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
 ; PROLOG-NEXT:    br i1 true, label %latch.2, label %innerexit.loopexit
@@ -4874,6 +4868,12 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-NEXT:    %innercnd.7 = icmp slt i32 %i11.7, %trip
 ; PROLOG-NEXT:    %iv.next.7 = add nuw nsw i64 %iv.next.6, 1
 ; PROLOG-NEXT:    br i1 %innercnd.7, label %header, label %outerLatch.loopexit.unr-lcssa
+; PROLOG:       outerLatch.loopexit.unr-lcssa:
+; PROLOG-NEXT:    br label %outerLatch.loopexit
+; PROLOG:       outerLatch.loopexit:
+; PROLOG-NEXT:    br label %outerLatch
+; PROLOG:       outerLatch:
+; PROLOG-NEXT:    br label %outerloopHdr
 ;
 ; PROLOG-BLOCK-LABEL: @test9(
 ; PROLOG-BLOCK-NEXT:  bb:
@@ -4923,6 +4923,11 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-BLOCK-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
 ; PROLOG-BLOCK-NEXT:    %i7.1 = trunc i64 %iv.next to i32
 ; PROLOG-BLOCK-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit.loopexit
+; PROLOG-BLOCK:       latch.1:
+; PROLOG-BLOCK-NEXT:    %i11.1 = add nsw i32 %i7.1, 1
+; PROLOG-BLOCK-NEXT:    %innercnd.1 = icmp slt i32 %i11.1, %trip
+; PROLOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
+; PROLOG-BLOCK-NEXT:    br i1 %innercnd.1, label %header, label %outerLatch.loopexit.unr-lcssa, !llvm.loop !13
 ; PROLOG-BLOCK:       outerLatch.loopexit.unr-lcssa:
 ; PROLOG-BLOCK-NEXT:    br label %outerLatch.loopexit
 ; PROLOG-BLOCK:       outerLatch.loopexit:
@@ -4931,11 +4936,6 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-BLOCK-NEXT:    %trip.1 = add i32 %n, -1
 ; PROLOG-BLOCK-NEXT:    %outercnd.1 = icmp slt i32 0, %trip.1
 ; PROLOG-BLOCK-NEXT:    br i1 %outercnd.1, label %preheader.1, label %outerLatch.1
-; PROLOG-BLOCK:       latch.1:
-; PROLOG-BLOCK-NEXT:    %i11.1 = add nsw i32 %i7.1, 1
-; PROLOG-BLOCK-NEXT:    %innercnd.1 = icmp slt i32 %i11.1, %trip
-; PROLOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
-; PROLOG-BLOCK-NEXT:    br i1 %innercnd.1, label %header, label %outerLatch.loopexit.unr-lcssa, !llvm.loop !13
 ; PROLOG-BLOCK:       preheader.1:
 ; PROLOG-BLOCK-NEXT:    %xtraiter.1 = and i32 %0, 1
 ; PROLOG-BLOCK-NEXT:    %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0
@@ -5013,53 +5013,22 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %0 = add i64 %trip, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %trip, 7
 ; EPILOG-NEXT:    %1 = icmp ult i64 %0, 7
-; EPILOG-NEXT:    br i1 %1, label %exit2.unr-lcssa, label %entry.new
-; EPILOG:       entry.new:
-; EPILOG-NEXT:    %unroll_iter = sub i64 %trip, %xtraiter
-; EPILOG-NEXT:    br label %loop_header
-; EPILOG:       loop_header:
-; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early, label %loop_latch, label %exit1.loopexit
-; EPILOG:       loop_latch:
-; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
-; EPILOG:       exit1.loopexit:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.unr-lcssa
-; EPILOG:       exit2.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    br i1 %1, label %exit2.unr-lcssa, label %entry.new
+; EPILOG:       entry.new:
+; EPILOG-NEXT:    %unroll_iter = sub i64 %trip, %xtraiter
+; EPILOG-NEXT:    br label %loop_header
+; EPILOG:       loop_header:
+; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1.loopexit1
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !13
-; EPILOG:       exit2.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit2
-; EPILOG:       exit2:
-; EPILOG-NEXT:    ret void
+; EPILOG-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
+; EPILOG-NEXT:    br i1 %cmp_early, label %loop_latch, label %exit1.loopexit
+; EPILOG:       loop_latch:
+; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
+; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
+; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
@@ -5101,6 +5070,37 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
+; EPILOG:       exit1.loopexit:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1.loopexit1:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
+; EPILOG:       exit2.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit2.unr-lcssa
+; EPILOG:       exit2.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1.loopexit1
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !13
+; EPILOG:       exit2.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit2
+; EPILOG:       exit2:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test10(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -5123,6 +5123,11 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !15
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
@@ -5144,11 +5149,6 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br label %exit2
 ; EPILOG-BLOCK:       exit2:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !15
 ;
 ; PROLOG-LABEL: @test10(
 ; PROLOG-NEXT:  entry:
@@ -5189,16 +5189,6 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
-; PROLOG:       exit1.loopexit:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1.loopexit1:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
-; PROLOG:       exit2.unr-lcssa:
-; PROLOG-NEXT:    br label %exit2
-; PROLOG:       exit2:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_latch.1:
 ; PROLOG-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-NEXT:    call void @bar()
@@ -5233,6 +5223,16 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit2.unr-lcssa
+; PROLOG:       exit1.loopexit:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1.loopexit1:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
+; PROLOG:       exit2.unr-lcssa:
+; PROLOG-NEXT:    br label %exit2
+; PROLOG:       exit2:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test10(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -5264,6 +5264,10 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; PROLOG-BLOCK-NEXT:    call void @bar()
 ; PROLOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
+; PROLOG-BLOCK:       loop_latch.1:
+; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !15
 ; PROLOG-BLOCK:       exit1.loopexit:
 ; PROLOG-BLOCK-NEXT:    br label %exit1
 ; PROLOG-BLOCK:       exit1:
@@ -5272,10 +5276,6 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; PROLOG-BLOCK-NEXT:    br label %exit2
 ; PROLOG-BLOCK:       exit2:
 ; PROLOG-BLOCK-NEXT:    ret void
-; PROLOG-BLOCK:       loop_latch.1:
-; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !15
 ;
 entry:
   br label %loop_header
@@ -5320,36 +5320,6 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
-; EPILOG:       exit1.loopexit:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.unr-lcssa
-; EPILOG:       exit2.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.loopexit1
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !14
-; EPILOG:       exit2.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit2
-; EPILOG:       exit2:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
@@ -5385,6 +5355,36 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
+; EPILOG:       exit1.loopexit:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1.loopexit1:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
+; EPILOG:       exit2.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit2.unr-lcssa
+; EPILOG:       exit2.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.loopexit1
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !14
+; EPILOG:       exit2.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit2
+; EPILOG:       exit2:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test11(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -5405,6 +5405,11 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !16
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
@@ -5423,11 +5428,6 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %exit2
 ; EPILOG-BLOCK:       exit2:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !16
 ;
 ; PROLOG-LABEL: @test11(
 ; PROLOG-NEXT:  entry:
@@ -5465,16 +5465,6 @@ define void @test11(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
-; PROLOG:       exit1.loopexit:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1.loopexit1:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
-; PROLOG:       exit2.unr-lcssa:
-; PROLOG-NEXT:    br label %exit2
-; PROLOG:       exit2:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_latch.1:
 ; PROLOG-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-NEXT:    call void @bar()
@@ -5503,6 +5493,16 @@ define void @test11(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit2.unr-lcssa
+; PROLOG:       exit1.loopexit:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1.loopexit1:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
+; PROLOG:       exit2.unr-lcssa:
+; PROLOG-NEXT:    br label %exit2
+; PROLOG:       exit2:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test11(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -5531,6 +5531,10 @@ define void @test11(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-BLOCK-NEXT:    call void @bar()
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
+; PROLOG-BLOCK:       loop_latch.1:
+; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
+; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !16
 ; PROLOG-BLOCK:       exit1.loopexit:
 ; PROLOG-BLOCK-NEXT:    br label %exit1
 ; PROLOG-BLOCK:       exit1:
@@ -5539,10 +5543,6 @@ define void @test11(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    br label %exit2
 ; PROLOG-BLOCK:       exit2:
 ; PROLOG-BLOCK-NEXT:    ret void
-; PROLOG-BLOCK:       loop_latch.1:
-; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
-; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !16
 ;
 entry:
   br label %loop_header
@@ -5590,37 +5590,6 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
-; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !15
-; EPILOG:       exit1.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_exiting_bb2.1:
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.1:
@@ -5676,6 +5645,37 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; EPILOG:       exit1.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG:       exit1.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_exiting_bb2.epil:
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !15
+; EPILOG:       exit1.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test12(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -5700,6 +5700,13 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_exiting_bb2.1:
+; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !17
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
@@ -5723,13 +5730,6 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_exiting_bb2.1:
-; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !17
 ;
 ; PROLOG-LABEL: @test12(
 ; PROLOG-NEXT:  entry:
@@ -5774,14 +5774,6 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG:       exit1.unr-lcssa.loopexit:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa.loopexit1:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_exiting_bb2.1:
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
 ; PROLOG:       loop_latch.1:
@@ -5830,6 +5822,14 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; PROLOG:       exit1.unr-lcssa.loopexit:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa.loopexit1:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test12(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -5865,18 +5865,18 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    call void @bar()
 ; PROLOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
-; PROLOG-BLOCK:       exit1.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    br label %exit1
-; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    ret void
 ; PROLOG-BLOCK:       loop_exiting_bb2.1:
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
 ; PROLOG-BLOCK:       loop_latch.1:
 ; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !17
+; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
+; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; PROLOG-BLOCK:       exit1.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    br label %exit1
+; PROLOG-BLOCK:       exit1:
+; PROLOG-BLOCK-NEXT:    ret void
 ;
 entry:
   br label %loop_header
@@ -5927,38 +5927,6 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
-; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
-; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !16
-; EPILOG:       exit1.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_exiting_bb2.1:
 ; EPILOG-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; EPILOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
@@ -6021,6 +5989,38 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; EPILOG:       exit1.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG:       exit1.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_exiting_bb2.epil:
+; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
+; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !16
+; EPILOG:       exit1.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test13(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -6046,6 +6046,14 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_exiting_bb2.1:
+; EPILOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
+; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !18
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
@@ -6070,14 +6078,6 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_exiting_bb2.1:
-; EPILOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
-; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !18
 ;
 ; PROLOG-LABEL: @test13(
 ; PROLOG-NEXT:  entry:
@@ -6124,14 +6124,6 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG:       exit1.unr-lcssa.loopexit:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa.loopexit1:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_exiting_bb2.1:
 ; PROLOG-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; PROLOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
@@ -6187,6 +6179,14 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; PROLOG:       exit1.unr-lcssa.loopexit:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa.loopexit1:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test13(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -6224,12 +6224,6 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG-BLOCK-NEXT:    call void @bar()
 ; PROLOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
-; PROLOG-BLOCK:       exit1.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    br label %exit1
-; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    ret void
 ; PROLOG-BLOCK:       loop_exiting_bb2.1:
 ; PROLOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; PROLOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
@@ -6237,6 +6231,12 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !18
+; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
+; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; PROLOG-BLOCK:       exit1.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    br label %exit1
+; PROLOG-BLOCK:       exit1:
+; PROLOG-BLOCK-NEXT:    ret void
 ;
 entry:
   br label %loop_header
@@ -6284,37 +6284,6 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
-; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
-; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
-; EPILOG:       loop_header.epil.preheader:
-; EPILOG-NEXT:    br label %loop_header.epil
-; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
-; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
-; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
-; EPILOG:       loop_latch.epil:
-; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !17
-; EPILOG:       exit1.epilog-lcssa.loopexit:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
-; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa:
-; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1:
-; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_exiting_bb2.1:
 ; EPILOG-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; EPILOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
@@ -6371,6 +6340,37 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; EPILOG:       exit1.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG:       exit1.unr-lcssa:
+; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
+; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    br label %loop_header.epil
+; EPILOG:       loop_header.epil:
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    call void @bar()
+; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_exiting_bb2.epil:
+; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
+; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG:       loop_latch.epil:
+; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
+; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !17
+; EPILOG:       exit1.epilog-lcssa.loopexit:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG-NEXT:    br label %exit1.epilog-lcssa
+; EPILOG:       exit1.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit1
+; EPILOG:       exit1:
+; EPILOG-NEXT:    ret void
 ;
 ; EPILOG-BLOCK-LABEL: @test14(
 ; EPILOG-BLOCK-NEXT:  entry:
@@ -6394,6 +6394,14 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_exiting_bb2.1:
+; EPILOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
+; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
+; EPILOG-BLOCK:       loop_latch.1:
+; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !19
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
 ; EPILOG-BLOCK:       exit1.unr-lcssa:
@@ -6415,14 +6423,6 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       loop_exiting_bb2.1:
-; EPILOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
-; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
-; EPILOG-BLOCK:       loop_latch.1:
-; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !19
 ;
 ; PROLOG-LABEL: @test14(
 ; PROLOG-NEXT:  entry:
@@ -6466,14 +6466,6 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG:       exit1.unr-lcssa.loopexit:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa.loopexit1:
-; PROLOG-NEXT:    br label %exit1.unr-lcssa
-; PROLOG:       exit1.unr-lcssa:
-; PROLOG-NEXT:    br label %exit1
-; PROLOG:       exit1:
-; PROLOG-NEXT:    ret void
 ; PROLOG:       loop_exiting_bb2.1:
 ; PROLOG-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; PROLOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
@@ -6523,6 +6515,14 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
 ; PROLOG-NEXT:    %cmp.7 = icmp ne i64 %iv_next.7, %trip
 ; PROLOG-NEXT:    br i1 %cmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
+; PROLOG:       exit1.unr-lcssa.loopexit:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa.loopexit1:
+; PROLOG-NEXT:    br label %exit1.unr-lcssa
+; PROLOG:       exit1.unr-lcssa:
+; PROLOG-NEXT:    br label %exit1
+; PROLOG:       exit1:
+; PROLOG-NEXT:    ret void
 ;
 ; PROLOG-BLOCK-LABEL: @test14(
 ; PROLOG-BLOCK-NEXT:  entry:
@@ -6557,12 +6557,6 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %iv_next = add i64 %iv, 1
 ; PROLOG-BLOCK-NEXT:    call void @bar()
 ; PROLOG-BLOCK-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.unr-lcssa.loopexit
-; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
-; PROLOG-BLOCK:       exit1.unr-lcssa:
-; PROLOG-BLOCK-NEXT:    br label %exit1
-; PROLOG-BLOCK:       exit1:
-; PROLOG-BLOCK-NEXT:    ret void
 ; PROLOG-BLOCK:       loop_exiting_bb2.1:
 ; PROLOG-BLOCK-NEXT:    %unknown.1 = call i1 @unknown_cond()
 ; PROLOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.unr-lcssa.loopexit
@@ -6570,6 +6564,12 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
 ; PROLOG-BLOCK-NEXT:    %cmp.1 = icmp ne i64 %iv_next.1, %trip
 ; PROLOG-BLOCK-NEXT:    br i1 %cmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !19
+; PROLOG-BLOCK:       exit1.unr-lcssa.loopexit:
+; PROLOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; PROLOG-BLOCK:       exit1.unr-lcssa:
+; PROLOG-BLOCK-NEXT:    br label %exit1
+; PROLOG-BLOCK:       exit1:
+; PROLOG-BLOCK-NEXT:    ret void
 ;
 entry:
   br label %loop_header

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index 07fcefc621d4f..28cd885dbc443 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -19,47 +19,47 @@ define i3 @test(i3* %a, i3 %n) {
 ; UNROLL-16-NEXT:    [[TMP0:%.*]] = load i3, i3* [[A:%.*]], align 1
 ; UNROLL-16-NEXT:    [[EXITCOND:%.*]] = icmp eq i3 1, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_1:%.*]]
-; UNROLL-16:       for.end.loopexit:
-; UNROLL-16-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ]
-; UNROLL-16-NEXT:    br label [[FOR_END]]
-; UNROLL-16:       for.end:
-; UNROLL-16-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
-; UNROLL-16-NEXT:    ret i3 [[SUM_0_LCSSA]]
 ; UNROLL-16:       for.body.1:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 1
 ; UNROLL-16-NEXT:    [[TMP1:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
-; UNROLL-16-NEXT:    [[ADD_1]] = add nsw i3 [[TMP1]], [[TMP0]]
+; UNROLL-16-NEXT:    [[ADD_1:%.*]] = add nsw i3 [[TMP1]], [[TMP0]]
 ; UNROLL-16-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i3 2, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2]]
+; UNROLL-16-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2:%.*]]
 ; UNROLL-16:       for.body.2:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 2
 ; UNROLL-16-NEXT:    [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
-; UNROLL-16-NEXT:    [[ADD_2]] = add nsw i3 [[TMP2]], [[ADD_1]]
+; UNROLL-16-NEXT:    [[ADD_2:%.*]] = add nsw i3 [[TMP2]], [[ADD_1]]
 ; UNROLL-16-NEXT:    [[EXITCOND_2:%.*]] = icmp eq i3 3, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3]]
+; UNROLL-16-NEXT:    br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
 ; UNROLL-16:       for.body.3:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 3
 ; UNROLL-16-NEXT:    [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
-; UNROLL-16-NEXT:    [[ADD_3]] = add nsw i3 [[TMP3]], [[ADD_2]]
+; UNROLL-16-NEXT:    [[ADD_3:%.*]] = add nsw i3 [[TMP3]], [[ADD_2]]
 ; UNROLL-16-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i3 -4, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4]]
+; UNROLL-16-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4:%.*]]
 ; UNROLL-16:       for.body.4:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 4
 ; UNROLL-16-NEXT:    [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_4]], align 1
-; UNROLL-16-NEXT:    [[ADD_4]] = add nsw i3 [[TMP4]], [[ADD_3]]
+; UNROLL-16-NEXT:    [[ADD_4:%.*]] = add nsw i3 [[TMP4]], [[ADD_3]]
 ; UNROLL-16-NEXT:    [[EXITCOND_4:%.*]] = icmp eq i3 -3, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5]]
+; UNROLL-16-NEXT:    br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
 ; UNROLL-16:       for.body.5:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 5
 ; UNROLL-16-NEXT:    [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_5]], align 1
-; UNROLL-16-NEXT:    [[ADD_5]] = add nsw i3 [[TMP5]], [[ADD_4]]
+; UNROLL-16-NEXT:    [[ADD_5:%.*]] = add nsw i3 [[TMP5]], [[ADD_4]]
 ; UNROLL-16-NEXT:    [[EXITCOND_5:%.*]] = icmp eq i3 -2, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6]]
+; UNROLL-16-NEXT:    br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6:%.*]]
 ; UNROLL-16:       for.body.6:
 ; UNROLL-16-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 6
 ; UNROLL-16-NEXT:    [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_6]], align 1
-; UNROLL-16-NEXT:    [[ADD_6]] = add nsw i3 [[TMP6]], [[ADD_5]]
+; UNROLL-16-NEXT:    [[ADD_6:%.*]] = add nsw i3 [[TMP6]], [[ADD_5]]
 ; UNROLL-16-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; UNROLL-16:       for.end.loopexit:
+; UNROLL-16-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ]
+; UNROLL-16-NEXT:    br label [[FOR_END]]
+; UNROLL-16:       for.end:
+; UNROLL-16-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; UNROLL-16-NEXT:    ret i3 [[SUM_0_LCSSA]]
 ;
 ; UNROLL-4-LABEL: @test(
 ; UNROLL-4-NEXT:  entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 707e5278fdd06..89aa49dd71a0c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -30,111 +30,111 @@ define i32 @test1(i32* nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
-; CHECK:       latchexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
-; CHECK:       latchexit.unr-lcssa:
-; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
-; CHECK:       header.epil.preheader:
-; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
-; CHECK:       header.epil:
-; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
-; CHECK:       for.exiting_block.epil:
-; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
-; CHECK-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
-; CHECK:       latch.epil:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP3]], [[SUM_02_EPIL]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       latchexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[LATCHEXIT]]
-; CHECK:       latchexit:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
-; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
-; CHECK:       otherexit.loopexit:
-; CHECK-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[FOR_EXITING_BLOCK_3:%.*]] ], [ [[ADD_3:%.*]], [[FOR_EXITING_BLOCK_4:%.*]] ], [ [[ADD_4:%.*]], [[FOR_EXITING_BLOCK_5:%.*]] ], [ [[ADD_5:%.*]], [[FOR_EXITING_BLOCK_6:%.*]] ], [ [[ADD_6:%.*]], [[FOR_EXITING_BLOCK_7:%.*]] ]
-; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
-; CHECK:       otherexit.loopexit3:
-; CHECK-NEXT:    br label [[OTHEREXIT]]
-; CHECK:       otherexit:
-; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
-; CHECK-NEXT:    [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
-; CHECK-NEXT:    ret i32 [[RVAL]]
 ; CHECK:       for.exiting_block.1:
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_2]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_2:%.*]]
 ; CHECK:       for.exiting_block.2:
 ; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_3]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_3:%.*]]
 ; CHECK:       for.exiting_block.3:
 ; CHECK-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_4]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_4:%.*]]
 ; CHECK:       for.exiting_block.4:
 ; CHECK-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
 ; CHECK:       latch.4:
 ; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
-; CHECK-NEXT:    [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_5]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_5:%.*]]
 ; CHECK:       for.exiting_block.5:
 ; CHECK-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
 ; CHECK:       latch.5:
 ; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
-; CHECK-NEXT:    [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_6]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_6:%.*]]
 ; CHECK:       for.exiting_block.6:
 ; CHECK-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
 ; CHECK:       latch.6:
 ; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
-; CHECK-NEXT:    [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
-; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_7]]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_7:%.*]]
 ; CHECK:       for.exiting_block.7:
 ; CHECK-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
 ; CHECK-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
 ; CHECK:       latch.7:
 ; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
-; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]], label [[HEADER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
+; CHECK:       latchexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK:       latchexit.unr-lcssa:
+; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
+; CHECK:       header.epil:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK:       for.exiting_block.epil:
+; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; CHECK:       latch.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       latchexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[LATCHEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       otherexit.loopexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
+; CHECK:       otherexit.loopexit3:
+; CHECK-NEXT:    br label [[OTHEREXIT]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT:    [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; CHECK-NEXT:    ret i32 [[RVAL]]
 ;
 ; NOUNROLL-LABEL: @test1(
 ; NOUNROLL-NEXT:  entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
index 02e86de97e99f..766a85840634e 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
@@ -107,10 +107,6 @@ define dso_local void @hoge_5(i8 %arg) {
 ; UPPER-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
 ; UPPER-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
 ; UPPER-NEXT:    br i1 [[TMP1]], label [[LOOP_1:%.*]], label [[EXIT_LOOPEXIT:%.*]]
-; UPPER:       exit.loopexit:
-; UPPER-NEXT:    br label [[EXIT]]
-; UPPER:       exit:
-; UPPER-NEXT:    ret void
 ; UPPER:       loop.1:
 ; UPPER-NEXT:    [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
 ; UPPER-NEXT:    [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
@@ -139,6 +135,10 @@ define dso_local void @hoge_5(i8 %arg) {
 ; UPPER-NEXT:    [[PTR_NEXT_5:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
 ; UPPER-NEXT:    br label [[EXIT_LOOPEXIT]]
+; UPPER:       exit.loopexit:
+; UPPER-NEXT:    br label [[EXIT]]
+; UPPER:       exit:
+; UPPER-NEXT:    ret void
 ;
 entry:
   %x = load i32, i32* @global, align 4

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index da3b4ecba0ee6..f0cbf13fba806 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -36,8 +36,26 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
+; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
+; CHECK-NEXT:    [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]]
+; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
+; CHECK-NEXT:    [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
-; CHECK-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_BODY_EPIL_1]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_BODY_EPIL_2:%.*]] ]
+; CHECK-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[ADD_EPIL_1]], [[FOR_BODY_EPIL_1]] ], [ [[ADD_EPIL_2]], [[FOR_BODY_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
@@ -50,54 +68,36 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
 ; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
-; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
-; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
-; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
 ; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       for.body.epil.1:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
-; CHECK-NEXT:    [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]]
-; CHECK-NEXT:    [[ADD_EPIL_1]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2]]
-; CHECK:       for.body.epil.2:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
-; CHECK-NEXT:    [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
-; CHECK-NEXT:    [[ADD_EPIL_2]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ;
 entry:
   %cmp9 = icmp eq i32 %N, 0

diff  --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
index 87e582a27293e..70c8d29e4a826 100644
--- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
@@ -71,35 +71,35 @@ define i64 @earlyLoopTest(i64* %base) nounwind {
 ; CHECK:       tail:
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]]
-; CHECK:       exit1:
-; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ]
-; CHECK-NEXT:    ret i64 [[S_LCSSA]]
-; CHECK:       exit2:
-; CHECK-NEXT:    [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1:%.*]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ], [ [[S_NEXT_3:%.*]], [[TAIL_3:%.*]] ]
-; CHECK-NEXT:    ret i64 [[S_NEXT_LCSSA1]]
 ; CHECK:       loop.1:
 ; CHECK-NEXT:    [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1
 ; CHECK-NEXT:    [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4
-; CHECK-NEXT:    [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]]
-; CHECK-NEXT:    br label [[TAIL_1]]
+; CHECK-NEXT:    [[S_NEXT_1:%.*]] = add i64 [[VAL]], [[VAL_1]]
+; CHECK-NEXT:    br label [[TAIL_1:%.*]]
 ; CHECK:       tail.1:
 ; CHECK-NEXT:    [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0
 ; CHECK-NEXT:    br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]]
 ; CHECK:       loop.2:
 ; CHECK-NEXT:    [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2
 ; CHECK-NEXT:    [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4
-; CHECK-NEXT:    [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]]
-; CHECK-NEXT:    br label [[TAIL_2]]
+; CHECK-NEXT:    [[S_NEXT_2:%.*]] = add i64 [[S_NEXT_1]], [[VAL_2]]
+; CHECK-NEXT:    br label [[TAIL_2:%.*]]
 ; CHECK:       tail.2:
 ; CHECK-NEXT:    [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0
-; CHECK-NEXT:    br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]]
+; CHECK-NEXT:    br i1 [[CMP2_2]], label [[LOOP_3:%.*]], label [[EXIT2]]
 ; CHECK:       loop.3:
 ; CHECK-NEXT:    [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3
 ; CHECK-NEXT:    [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4
-; CHECK-NEXT:    [[S_NEXT_3]] = add i64 [[S_NEXT_2]], [[VAL_3]]
-; CHECK-NEXT:    br i1 false, label [[TAIL_3]], label [[EXIT1:%.*]]
+; CHECK-NEXT:    [[S_NEXT_3:%.*]] = add i64 [[S_NEXT_2]], [[VAL_3]]
+; CHECK-NEXT:    br i1 false, label [[TAIL_3:%.*]], label [[EXIT1:%.*]]
 ; CHECK:       tail.3:
 ; CHECK-NEXT:    br label [[EXIT2]]
+; CHECK:       exit1:
+; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2]], [[LOOP_3]] ]
+; CHECK-NEXT:    ret i64 [[S_LCSSA]]
+; CHECK:       exit2:
+; CHECK-NEXT:    [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1]], [[TAIL_1]] ], [ [[S_NEXT_2]], [[TAIL_2]] ], [ [[S_NEXT_3]], [[TAIL_3]] ]
+; CHECK-NEXT:    ret i64 [[S_NEXT_LCSSA1]]
 ;
 entry:
   br label %loop
@@ -174,12 +174,6 @@ define i32 @multiExitIncomplete(i32* %base) nounwind {
 ; CHECK:       l3:
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0
 ; CHECK-NEXT:    br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]]
-; CHECK:       exit1:
-; CHECK-NEXT:    ret i32 1
-; CHECK:       exit2:
-; CHECK-NEXT:    ret i32 2
-; CHECK:       exit3:
-; CHECK-NEXT:    ret i32 3
 ; CHECK:       l1.1:
 ; CHECK-NEXT:    [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1
 ; CHECK-NEXT:    [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4
@@ -222,6 +216,12 @@ define i32 @multiExitIncomplete(i32* %base) nounwind {
 ; CHECK-NEXT:    br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       l3.5:
 ; CHECK-NEXT:    br label [[EXIT3]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i32 1
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i32 2
+; CHECK:       exit3:
+; CHECK-NEXT:    ret i32 3
 ;
 entry:
   br label %l1
@@ -313,15 +313,15 @@ define void @nsw_latch(i32* %a) nounwind {
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]]
+; CHECK:       for.body.1:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND_1:%.*]], label [[RETURN]]
+; CHECK:       for.cond.1:
+; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
-; CHECK-NEXT:    [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ]
+; CHECK-NEXT:    [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ]
 ; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ]
 ; CHECK-NEXT:    store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
-; CHECK:       for.body.1:
-; CHECK-NEXT:    br i1 false, label [[FOR_COND_1]], label [[RETURN]]
-; CHECK:       for.cond.1:
-; CHECK-NEXT:    br label [[RETURN]]
 ;
 entry:
   br label %for.body

diff  --git a/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll b/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
index 6270962e8fb01..93405bb47d536 100644
--- a/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
+++ b/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
@@ -75,10 +75,10 @@ define void @non_latch_exit(double* nocapture %p, i64 %n) nounwind {
 ; CHECK-NEXT:    store double [[MUL9_1]], double* [[ARRAYIDX7_1]], align 8
 ; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i64 [[TMP16_1]], [[MUL10]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_END:%.*]], label [[LATCH_1]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       latch.1:
 ; CHECK-NEXT:    br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %mul10 = shl i64 %n, 1

diff  --git a/llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll b/llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll
index c78cdebe43710..bb9db35c1b4cd 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll
@@ -22,12 +22,9 @@ define i16 @full_unroll_multiple_exiting_blocks(i16* %A, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2
 ; CHECK-NEXT:    [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]]
 ; CHECK-NEXT:    br label [[EXITING_1_1:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ]
-; CHECK-NEXT:    ret i16 [[RES_LCSSA]]
 ; CHECK:       exiting.1.1:
 ; CHECK-NEXT:    [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]]
-; CHECK-NEXT:    br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]]
+; CHECK-NEXT:    br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1:%.*]]
 ; CHECK:       exiting.2.1:
 ; CHECK-NEXT:    [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]]
 ; CHECK-NEXT:    br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1:%.*]]
@@ -35,26 +32,29 @@ define i16 @full_unroll_multiple_exiting_blocks(i16* %A, i16 %x, i16 %y) {
 ; CHECK-NEXT:    [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2
 ; CHECK-NEXT:    [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2
 ; CHECK-NEXT:    [[RES_NEXT_2:%.*]] = add i16 [[RES_NEXT_1]], [[LV_2]]
-; CHECK-NEXT:    br label [[EXITING_1_2]]
+; CHECK-NEXT:    br label [[EXITING_1_2:%.*]]
 ; CHECK:       exiting.1.2:
 ; CHECK-NEXT:    [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]]
-; CHECK-NEXT:    br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]]
+; CHECK-NEXT:    br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2:%.*]]
 ; CHECK:       exiting.2.2:
 ; CHECK-NEXT:    [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]]
-; CHECK-NEXT:    br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]]
+; CHECK-NEXT:    br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2:%.*]]
 ; CHECK:       latch.2:
 ; CHECK-NEXT:    [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3
 ; CHECK-NEXT:    [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2
-; CHECK-NEXT:    [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]]
-; CHECK-NEXT:    br i1 false, label [[EXITING_1_3]], label [[EXIT]]
+; CHECK-NEXT:    [[RES_NEXT_3:%.*]] = add i16 [[RES_NEXT_2]], [[LV_3]]
+; CHECK-NEXT:    br i1 false, label [[EXITING_1_3:%.*]], label [[EXIT]]
 ; CHECK:       exiting.1.3:
 ; CHECK-NEXT:    [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]]
-; CHECK-NEXT:    br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]]
+; CHECK-NEXT:    br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3:%.*]]
 ; CHECK:       exiting.2.3:
 ; CHECK-NEXT:    [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]]
 ; CHECK-NEXT:    br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]]
 ; CHECK:       latch.3:
 ; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1]] ], [ 0, [[EXITING_1_2]] ], [ 1, [[EXITING_2_2]] ], [ [[RES_NEXT_3]], [[LATCH_2]] ], [ 0, [[EXITING_1_3]] ], [ 1, [[EXITING_2_3]] ]
+; CHECK-NEXT:    ret i16 [[RES_LCSSA]]
 ;
 entry:
   br label %header

diff  --git a/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll
index 7d56166d0cd56..269969f1ebb43 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll
@@ -15,8 +15,6 @@ define void @test2(i32* %arg, i64* %out)  {
 ; CHECK-NEXT:    [[PTR_1:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 1
 ; CHECK-NEXT:    store i32 0, i32* [[PTR_1]], align 4
 ; CHECK-NEXT:    br label [[FOR_LATCH_1:%.*]]
-; CHECK:       if.end.loopexit:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.latch.1:
 ; CHECK-NEXT:    store volatile i64 1, i64* [[OUT]], align 4
 ; CHECK-NEXT:    [[PTR_2:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 2
@@ -30,6 +28,8 @@ define void @test2(i32* %arg, i64* %out)  {
 ; CHECK:       for.latch.3:
 ; CHECK-NEXT:    store volatile i64 3, i64* [[OUT]], align 4
 ; CHECK-NEXT:    unreachable
+; CHECK:       if.end.loopexit:
+; CHECK-NEXT:    ret void
 ;
 
 entry:
@@ -63,11 +63,11 @@ define double @test_with_lcssa(double %arg1, double* %arg2) {
 ; CHECK-NEXT:    [[LV:%.*]] = load double, double* [[PTR]], align 8
 ; CHECK-NEXT:    [[RES_1:%.*]] = fsub double [[LV]], [[RES]]
 ; CHECK-NEXT:    br i1 true, label [[LOOP_EXIT:%.*]], label [[LOOP_LATCH_1:%.*]]
+; CHECK:       loop.latch.1:
+; CHECK-NEXT:    unreachable
 ; CHECK:       loop.exit:
 ; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi double [ [[RES_1]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    ret double [[RES_LCSSA]]
-; CHECK:       loop.latch.1:
-; CHECK-NEXT:    unreachable
 ;
 
 entry:
@@ -110,8 +110,6 @@ define void @test_with_nested_loop(i32* %arg)  {
 ; CHECK-NEXT:    br i1 [[INNER_COND]], label [[OUTER_LATCH:%.*]], label [[INNER_BODY]]
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    br label [[INNER_BODY_PREHEADER_1:%.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ; CHECK:       inner.body.preheader.1:
 ; CHECK-NEXT:    br label [[INNER_BODY_1:%.*]]
 ; CHECK:       inner.body.1:
@@ -136,6 +134,8 @@ define void @test_with_nested_loop(i32* %arg)  {
 ; CHECK-NEXT:    br i1 [[INNER_COND_2]], label [[OUTER_LATCH_2:%.*]], label [[INNER_BODY_2]]
 ; CHECK:       outer.latch.2:
 ; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 
 entry:

diff  --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 19b5015e782a7..bdd1f4d4b921d 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -102,12 +102,6 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
 ; CHECK:       for.inner.epil.1:
@@ -143,6 +137,12 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.end.loopexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cmp = icmp ne i32 %E, 0
@@ -287,12 +287,6 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.end10.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT]]
-; CHECK:       for.end10.loopexit:
-; CHECK-NEXT:    br label [[FOR_END10]]
-; CHECK:       for.end10:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9_EPIL]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
@@ -330,6 +324,12 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.end10.loopexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_END10_LOOPEXIT]]
+; CHECK:       for.end10.loopexit:
+; CHECK-NEXT:    br label [[FOR_END10]]
+; CHECK:       for.end10:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cmp = icmp ne i32 %E, 0
@@ -820,12 +820,6 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
@@ -868,6 +862,12 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.end.loopexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cmp = icmp ne i32 %E, 0
@@ -1023,16 +1023,6 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA:%.*]]
-; CHECK:       for.cleanup.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_CLEANUP]]
-; CHECK:       for.cleanup:
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[X_038]], 1
-; CHECK-NEXT:    [[EXITCOND41:%.*]] = icmp eq i32 [[INC]], 5
-; CHECK-NEXT:    br i1 [[EXITCOND41]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_OUTEST]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
@@ -1075,6 +1065,16 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], [[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label [[FOR_CLEANUP_EPILOG_LCSSA]]
+; CHECK:       for.cleanup.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_CLEANUP]]
+; CHECK:       for.cleanup:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[X_038]], 1
+; CHECK-NEXT:    [[EXITCOND41:%.*]] = icmp eq i32 [[INC]], 5
+; CHECK-NEXT:    br i1 [[EXITCOND41]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_OUTEST]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cmp = icmp eq i32 %E, 0
@@ -1226,12 +1226,6 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
 ; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
-; CHECK:       for.end.loopexit.epilog-lcssa:
-; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
 ; CHECK:       for.inner.epil.1:
@@ -1269,6 +1263,12 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.end.loopexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %cmp = icmp ne i32 %E, 0

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
index 14f18654da4e8..33cd004d43482 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -120,8 +120,6 @@ define void @matrix_extract_insert_loop(i32 %i, [225 x double]* nonnull align 8
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP8]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP7]]
 ; CHECK-NEXT:    br label [[FOR_BODY4_US_1:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
 ; CHECK:       for.body4.us.1:
 ; CHECK-NEXT:    [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ]
 ; CHECK-NEXT:    [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15
@@ -185,6 +183,8 @@ define void @matrix_extract_insert_loop(i32 %i, [225 x double]* nonnull align 8
 ; CHECK-NEXT:    [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1
 ; CHECK-NEXT:    [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]]
 ; CHECK-NEXT:    br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %i.addr = alloca i32, align 4


        


More information about the llvm-commits mailing list