[llvm] 37ead20 - [runtime-unroll] Use incrementing IVs instead of decrementing ones

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 12 16:08:13 PST 2021


Author: Philip Reames
Date: 2021-11-12T15:44:58-08:00
New Revision: 37ead201e6148f2b079179a71ccbad5af223ce03

URL: https://github.com/llvm/llvm-project/commit/37ead201e6148f2b079179a71ccbad5af223ce03
DIFF: https://github.com/llvm/llvm-project/commit/37ead201e6148f2b079179a71ccbad5af223ce03.diff

LOG: [runtime-unroll] Use incrementing IVs instead of decrementing ones

This is one of those wonderful "in theory X doesn't matter, but in practice is does" changes. In this particular case, we shift the IVs inserted by the runtime unroller to clamp iteration count of the loops* from decrementing to incrementing.

Why does this matter?  A couple of reasons:
* SCEV doesn't have a native subtract node.  Instead, all subtracts (A - B) are represented as A + -1 * B and drops any flags invalidated by such.  As a result, SCEV is slightly less good at reasoning about edge cases involving decrementing addrecs than incrementing ones.  (You can see this in the inferred flags in some of the test cases.)
* Other parts of the optimizer produce incrementing IVs, and they're common in idiomatic source language.  We do have support for reversing IVs, but in general if we produce one of each, the pair will persist surprisingly far through the optimizer before being coalesced.  (You can see this looking at nearby phis in the test cases.)

Note that if the hardware prefers decrementing (i.e. zero tested) loops, LSR should convert back immediately before codegen.

* Mostly irrelevant detail: The main loop of the prolog case is handled independently and will simple use the original IV with a changed start value.  We could in theory use this scheme for all iteration clamping, but that's a larger and more invasive change.

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/test/DebugInfo/unrolled-loop-remainder.ll
    llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
    llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
    llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
    llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
    llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
    llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
    llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
    llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
    llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
    llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 22d6d5c948a0..0cde2bcfbca1 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -353,20 +353,22 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
     if (Latch == *BB) {
       // For the last block, create a loop back to cloned head.
       VMap.erase((*BB)->getTerminator());
+      // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
+      // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
+      // thus we must compare the post-increment (wrapping) value.
       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
       IRBuilder<> Builder(LatchBR);
       PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
                                         suffix + ".iter",
                                         FirstLoopBB->getFirstNonPHI());
-      Value *IdxSub =
-        Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
-                          NewIdx->getName() + ".sub");
-      Value *IdxCmp =
-        Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
+      auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+      auto *One = ConstantInt::get(NewIdx->getType(), 1);
+      Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+      Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
       Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
-      NewIdx->addIncoming(NewIter, InsertTop);
-      NewIdx->addIncoming(IdxSub, NewBB);
+      NewIdx->addIncoming(Zero, InsertTop);
+      NewIdx->addIncoming(IdxNext, NewBB);
       LatchBR->eraseFromParent();
     }
   }
@@ -920,23 +922,22 @@ bool llvm::UnrollRuntimeLoopRemainder(
                   PreserveLCSSA);
 
     // Update counter in loop for unrolling.
-    // I should be multiply of Count.
+    // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
+    // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
+    // thus we must compare the post-increment (wrapping) value.
     IRBuilder<> B2(NewPreHeader->getTerminator());
     Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
     BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-    B2.SetInsertPoint(LatchBR);
     PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                       Header->getFirstNonPHI());
-    Value *IdxSub =
-        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
-                     NewIdx->getName() + ".nsub");
-    Value *IdxCmp;
-    if (LatchBR->getSuccessor(0) == Header)
-      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
-    else
-      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
-    NewIdx->addIncoming(TestVal, NewPreHeader);
-    NewIdx->addIncoming(IdxSub, Latch);
+    B2.SetInsertPoint(LatchBR);
+    auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+    auto *One = ConstantInt::get(NewIdx->getType(), 1);
+    Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+    auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+    Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
+    NewIdx->addIncoming(Zero, NewPreHeader);
+    NewIdx->addIncoming(IdxNext, Latch);
     LatchBR->setCondition(IdxCmp);
   } else {
     // Connect the prolog code to the original loop and update the

diff  --git a/llvm/test/DebugInfo/unrolled-loop-remainder.ll b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
index ba4ce1f409f6..a1dbce56ad52 100644
--- a/llvm/test/DebugInfo/unrolled-loop-remainder.ll
+++ b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
@@ -35,8 +35,7 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
 ; CHECK-NEXT:    [[CONV_PROL:%.*]] = sext i32 [[TMP5]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[CONV_PROL]] to i32*, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_PROL:%.*]] = add nsw i32 [[DOTPR]], 2, !dbg [[DBG29:![0-9]+]]
-; CHECK-NEXT:    [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1, !dbg [[DBG24]]
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]], !dbg [[DBG24]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_1:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !dbg [[DBG24]]
 ; CHECK:       for.body.prol.1:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 1, !dbg [[DBG28]]
@@ -44,8 +43,7 @@ define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
 ; CHECK-NEXT:    [[CONV_PROL_1:%.*]] = sext i32 [[TMP7]] to i64, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[CONV_PROL_1]] to i32*, !dbg [[DBG28]]
 ; CHECK-NEXT:    [[ADD_PROL_1:%.*]] = add nsw i32 [[ADD_PROL]], 2, !dbg [[DBG29]]
-; CHECK-NEXT:    [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1, !dbg [[DBG24]]
-; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]], !dbg [[DBG24]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
 ; CHECK:       for.body.prol.2:
 ; CHECK-NEXT:    [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 1, !dbg [[DBG28]]

diff  --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
index 5c8f9ca01679..4f0c07343bda 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -20,7 +20,7 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
 ; CHECK-A55-NEXT:    [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4
 ; CHECK-A55-NEXT:    br label [[FOR_BODY6:%.*]]
 ; CHECK-A55:       for.body6:
-; CHECK-A55-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY6]] ]
+; CHECK-A55-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
 ; CHECK-A55-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
 ; CHECK-A55-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
 ; CHECK-A55-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
@@ -50,8 +50,8 @@ define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %
 ; CHECK-A55-NEXT:    [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]]
 ; CHECK-A55-NEXT:    [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[ADD21_2]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_3]], i32* [[ARRAYIDX20]], align 4
-; CHECK-A55-NEXT:    [[NITER_NSUB_3]] = add i32 [[NITER]], -4
-; CHECK-A55-NEXT:    [[NITER_NCMP_3_NOT:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-A55-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
+; CHECK-A55-NEXT:    [[NITER_NCMP_3_NOT:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-A55-NEXT:    br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-A55:       for.end.loopexit.unr-lcssa:
 ; CHECK-A55-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
index 8c4257698ab7..9af0327f1a04 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
@@ -42,8 +42,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.epil:
 ; CHECK-NEXT:    [[TEMP_1_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[IF_THEN_EPIL]] ], [ [[TEMP_09_UNR]], [[FOR_BODY_EPIL]] ]
 ; CHECK-NEXT:    [[INC_EPIL:%.*]] = add nuw i32 [[J_010_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.body.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
@@ -58,8 +57,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.epil.1:
 ; CHECK-NEXT:    [[TEMP_1_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[TEMP_1_EPIL]], [[FOR_BODY_EPIL_1]] ]
 ; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.body.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
@@ -87,7 +85,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[J_010:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[TEMP_09:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_010]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
@@ -100,7 +98,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[TEMP_09]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_010]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP10]], 0
@@ -113,7 +111,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.1:
 ; CHECK-NEXT:    [[TEMP_1_1:%.*]] = phi i32 [ [[ADD_1]], [[IF_THEN_1]] ], [ [[TEMP_1]], [[FOR_INC]] ]
 ; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP12]], 0
@@ -126,7 +124,7 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.2:
 ; CHECK-NEXT:    [[TEMP_1_2:%.*]] = phi i32 [ [[ADD_2]], [[IF_THEN_2]] ], [ [[TEMP_1_1]], [[FOR_INC_1]] ]
 ; CHECK-NEXT:    [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
 ; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[TMP14]], 0
@@ -139,8 +137,8 @@ define void @test_three_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.3:
 ; CHECK-NEXT:    [[TEMP_1_3]] = phi i32 [ [[ADD_3]], [[IF_THEN_3]] ], [ [[TEMP_1_2]], [[FOR_INC_2]] ]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[INC_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
 ;
   i32* nocapture readonly %Condition,
@@ -414,8 +412,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.epil:
 ; CHECK-NEXT:    [[TEMP_1_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[IF_THEN_EPIL]] ], [ [[SUB13_EPIL]], [[IF_ELSE_EPIL]] ]
 ; CHECK-NEXT:    [[INC_EPIL:%.*]] = add nuw i32 [[J_027_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.body.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
@@ -440,8 +437,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.epil.1:
 ; CHECK-NEXT:    [[TEMP_1_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[SUB13_EPIL_1]], [[IF_ELSE_EPIL_1]] ]
 ; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.body.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
@@ -480,7 +476,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP23]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[J_027:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[TEMP_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_027]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP13]], 65535
@@ -503,7 +499,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[SUB13]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_027]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP16]], 65535
@@ -526,7 +522,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.1:
 ; CHECK-NEXT:    [[TEMP_1_1:%.*]] = phi i32 [ [[ADD_1]], [[IF_THEN_1]] ], [ [[SUB13_1]], [[IF_ELSE_1]] ]
 ; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ugt i32 [[TMP19]], 65535
@@ -549,7 +545,7 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.2:
 ; CHECK-NEXT:    [[TEMP_1_2:%.*]] = phi i32 [ [[ADD_2]], [[IF_THEN_2]] ], [ [[SUB13_2]], [[IF_ELSE_2]] ]
 ; CHECK-NEXT:    [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ugt i32 [[TMP22]], 65535
@@ -572,8 +568,8 @@ define void @test_four_blocks(i32* nocapture %Output,
 ; CHECK:       for.inc.3:
 ; CHECK-NEXT:    [[TEMP_1_3]] = phi i32 [ [[ADD_3]], [[IF_THEN_3]] ], [ [[SUB13_3]], [[IF_ELSE_3]] ]
 ; CHECK-NEXT:    [[INC_3]] = add nuw nsw i32 [[INC_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
 ;
   i32* nocapture readonly %Condition,

diff  --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
index 8c91a521c527..7c2da16b807a 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
@@ -31,7 +31,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
@@ -41,7 +41,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer
@@ -51,8 +51,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[NITER_NSUB_1]] = sub i64 [[NITER_NSUB]], 1
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NSUB_1]], 0
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER_NEXT]], 1
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       middle.block.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
@@ -89,7 +89,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ]
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
 ; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
 ; CHECK-NEXT:    [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
@@ -99,8 +99,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store i8 [[CONV_PROL]], i8* [[ARRAYIDX_PROL]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER1]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.body.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
index 20f6d91b0de3..dc5e12d09044 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -31,7 +31,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
@@ -41,7 +41,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer
@@ -51,8 +51,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[NITER_NSUB_1]] = sub i64 [[NITER_NSUB]], 1
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NSUB_1]], 0
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER_NEXT]], 1
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       middle.block.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
@@ -89,7 +89,7 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ]
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
 ; CHECK-NEXT:    [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
 ; CHECK-NEXT:    [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
@@ -99,8 +99,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    store i8 [[CONV_PROL]], i8* [[ARRAYIDX_PROL]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER1]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.body.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
index e36f0693d015..2ef342b80b92 100644
--- a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
@@ -145,7 +145,7 @@ define hidden void @runtime(i32* nocapture %a, i32* nocapture readonly %b, i32*
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]]
 ; CHECK-NEXT:    [[I:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]]
@@ -162,8 +162,8 @@ define hidden void @runtime(i32* nocapture %a, i32* nocapture readonly %b, i32*
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INC]]
 ; CHECK-NEXT:    store i32 [[MUL_1]], i32* [[ARRAYIDX2_1]], align 4
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[I_09]], 2
-; CHECK-NEXT:    [[NITER_NSUB_1]] = add i32 [[NITER]], -2
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NSUB_1]], 0
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
index b9e81c4adefa..a2e3693aadd8 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
@@ -15,7 +15,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED:       header:
 ; ENABLED-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ]
 ; ENABLED-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ]
-; ENABLED-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY_7]] ]
+; ENABLED-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_BODY:%.*]]
 ; ENABLED:       for.body:
@@ -23,7 +23,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; ENABLED-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; ENABLED-NEXT:    [[NITER_NSUB:%.*]] = sub i64 [[NITER]], 1
+; ENABLED-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
 ; ENABLED-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]]
 ; ENABLED:       for.body.1:
@@ -31,7 +31,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
 ; ENABLED-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_1:%.*]] = sub i64 [[NITER_NSUB]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1
 ; ENABLED-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2:%.*]]
 ; ENABLED:       for.body.2:
@@ -39,7 +39,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
 ; ENABLED-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_2:%.*]] = sub i64 [[NITER_NSUB_1]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
 ; ENABLED-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]]
 ; ENABLED:       for.body.3:
@@ -47,7 +47,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
 ; ENABLED-NEXT:    [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_3:%.*]] = sub i64 [[NITER_NSUB_2]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1
 ; ENABLED-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4:%.*]]
 ; ENABLED:       for.body.4:
@@ -55,7 +55,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
 ; ENABLED-NEXT:    [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_4:%.*]] = sub i64 [[NITER_NSUB_3]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1
 ; ENABLED-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]]
 ; ENABLED:       for.body.5:
@@ -63,7 +63,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
 ; ENABLED-NEXT:    [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_5:%.*]] = sub i64 [[NITER_NSUB_4]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1
 ; ENABLED-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6:%.*]]
 ; ENABLED:       for.body.6:
@@ -71,7 +71,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
 ; ENABLED-NEXT:    [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_6:%.*]] = sub i64 [[NITER_NSUB_5]], 1
+; ENABLED-NEXT:    [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1
 ; ENABLED-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]]
 ; ENABLED:       for.body.7:
@@ -79,8 +79,8 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
 ; ENABLED-NEXT:    [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1
-; ENABLED-NEXT:    [[NITER_NSUB_7]] = sub i64 [[NITER_NSUB_6]], 1
-; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1
+; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
 ; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
 ; ENABLED:       for.end.unr-lcssa.loopexit:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
@@ -98,7 +98,7 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED:       header.epil:
 ; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
 ; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ]
+; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
 ; ENABLED-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]]
 ; ENABLED:       for.body.epil:
@@ -107,8 +107,8 @@ define i32 @test(i32* nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]]
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
 ; ENABLED-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]]
-; ENABLED-NEXT:    [[EPIL_ITER_SUB]] = sub i64 [[EPIL_ITER]], 1
-; ENABLED-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_SUB]], 0
+; ENABLED-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; ENABLED-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; ENABLED-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[FOR_END_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; ENABLED:       for.end.epilog-lcssa:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
index 472c550a2278..f8dab0fa2c3c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -258,7 +258,7 @@ define void @test4(i16 %c3) {
 ; CHECK-NEXT:    br label [[HEADER_PROL:%.*]]
 ; CHECK:       header.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ 0, [[HEADER_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LATCH_PROL]] ]
 ; CHECK-NEXT:    br label [[EXITING_PROL:%.*]]
 ; CHECK:       exiting.prol:
 ; CHECK-NEXT:    switch i16 [[C3:%.*]], label [[DEFAULT_LOOPEXIT_LOOPEXIT1:%.*]] [
@@ -268,8 +268,8 @@ define void @test4(i16 %c3) {
 ; CHECK:       latch.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[C2_PROL:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT_PROL]], [[C1]]
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]
@@ -377,7 +377,7 @@ define void @test5(i1 %c) {
 ; CHECK-NEXT:    br label [[OUTERH_PROL:%.*]]
 ; CHECK:       outerH.prol:
 ; CHECK-NEXT:    [[TMP4_PROL:%.*]] = phi i32 [ [[TMP6_PROL:%.*]], [[OUTERLATCH_PROL:%.*]] ], [ undef, [[OUTERH_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i32 [ 0, [[OUTERH_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[OUTERLATCH_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i32 [ 0, [[OUTERH_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[OUTERLATCH_PROL]] ]
 ; CHECK-NEXT:    br label [[INNERH_PROL:%.*]]
 ; CHECK:       innerH.prol:
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[INNEREXITING_PROL:%.*]], label [[OTHEREXITB_LOOPEXIT1:%.*]]
@@ -406,8 +406,8 @@ define void @test5(i1 %c) {
 ; CHECK:       outerLatch.prol:
 ; CHECK-NEXT:    [[TMP6_PROL]] = add i32 [[TMP4_PROL]], 1
 ; CHECK-NEXT:    [[TMP7_PROL:%.*]] = icmp sgt i32 [[TMP6_PROL]], 79
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i32 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[OUTERH_PROL]], label [[OUTERH_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       outerH.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[TMP4_UNR_PH:%.*]] = phi i32 [ [[TMP6_PROL]], [[OUTERLATCH_PROL]] ]
@@ -620,13 +620,13 @@ define void @test6(i1 %c) {
 ; CHECK-NEXT:    br label [[HEADER_PROL:%.*]]
 ; CHECK:       header.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ undef, [[HEADER_PROL_PREHEADER]] ], [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, [[HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[LATCH_PROL]] ]
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LATCH_PROL]], label [[OTHEREXIT_LOOPEXIT1:%.*]]
 ; CHECK:       latch.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nsw i64 [[INDVARS_IV_PROL]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_PROL]], 616
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
-; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       header.prol.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 1b6dd4cf2838..bd8265ca9936 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -20,7 +20,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch, label %loop_exiting_bb1
 ; EPILOG:       loop_exiting_bb1:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2, label %exit1.loopexit
@@ -34,7 +34,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    ret void
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
 ; EPILOG:       loop_exiting_bb1.1:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
@@ -42,7 +42,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.1, label %exit3.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.2, label %loop_exiting_bb1.2
 ; EPILOG:       loop_exiting_bb1.2:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.2, label %exit1.loopexit
@@ -50,7 +50,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.2, label %exit3.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.3, label %loop_exiting_bb1.3
 ; EPILOG:       loop_exiting_bb1.3:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.3, label %exit1.loopexit
@@ -58,7 +58,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.3, label %exit3.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.4, label %loop_exiting_bb1.4
 ; EPILOG:       loop_exiting_bb1.4:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.4, label %exit1.loopexit
@@ -66,7 +66,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.4, label %exit3.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.5, label %loop_exiting_bb1.5
 ; EPILOG:       loop_exiting_bb1.5:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.5, label %exit1.loopexit
@@ -74,7 +74,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.5, label %exit3.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.6, label %loop_exiting_bb1.6
 ; EPILOG:       loop_exiting_bb1.6:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.6, label %exit1.loopexit
@@ -82,7 +82,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.6, label %exit3.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.7, label %loop_exiting_bb1.7
 ; EPILOG:       loop_exiting_bb1.7:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.7, label %exit1.loopexit
@@ -90,8 +90,8 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch.7, label %exit3.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
@@ -110,7 +110,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG:       loop_exiting_bb1.epil:
 ; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1.loopexit1
@@ -119,8 +119,8 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !0
 ; EPILOG:       exit2.loopexit.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit2.loopexit
@@ -138,7 +138,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch, label %loop_exiting_bb1
 ; EPILOG-BLOCK:       loop_exiting_bb1:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2, label %exit1.loopexit
@@ -150,7 +150,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    ret void
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting_bb1.1
 ; EPILOG-BLOCK:       loop_exiting_bb1.1:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_exiting_bb2.1, label %exit1.loopexit
@@ -158,8 +158,8 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loop_latch.1, label %exit3.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !0
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
@@ -193,7 +193,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.prol, label %loop_exiting_bb1.prol
 ; PROLOG:       loop_exiting_bb1.prol:
 ; PROLOG-NEXT:    br i1 false, label %loop_exiting_bb2.prol, label %exit1.loopexit1
@@ -202,8 +202,8 @@ define void @test1(i64 %trip, i1 %cond) {
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !0
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -392,7 +392,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %for.body.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %for.body.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %for.body.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %for.body.7 ]
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -402,7 +402,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -412,7 +412,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
@@ -422,7 +422,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
 ; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
@@ -432,7 +432,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
 ; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
@@ -442,7 +442,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
 ; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
@@ -452,7 +452,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
 ; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
@@ -462,7 +462,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
 ; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
@@ -472,8 +472,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
 ; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %for.end.unr-lcssa.loopexit, label %header
 ; EPILOG:       for.end.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %for.body.7 ]
@@ -491,7 +491,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %for.body.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %for.body.epil ]
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
@@ -502,8 +502,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %for.end.epilog-lcssa, !llvm.loop !2
 ; EPILOG:       for.end.epilog-lcssa:
 ; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %for.body.epil ]
@@ -533,7 +533,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %for.body.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %for.body.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %for.body.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %for.body.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -543,7 +543,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-BLOCK-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -553,8 +553,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %for.end.unr-lcssa.loopexit, label %header, !llvm.loop !2
 ; EPILOG-BLOCK:       for.end.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %for.body.1 ]
@@ -600,7 +600,7 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %for.body.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %for.body.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %for.body.prol ]
 ; PROLOG-NEXT:    br i1 false, label %for.exit2.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %cmp.prol = icmp eq i64 %n, 42
@@ -611,8 +611,8 @@ define i32 @test2(i32* nocapture %a, i64 %n) {
 ; PROLOG-NEXT:    %add.prol = add nsw i32 %1, %sum.02.prol
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !2
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %sum.0.lcssa.unr.ph = phi i32 [ %add.prol, %for.body.prol ]
@@ -826,7 +826,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch, label %loop_exiting_bb1
 ; EPILOG:       loop_exiting_bb1:
 ; EPILOG-NEXT:    switch i64 %sum, label %loop_latch [
@@ -842,7 +842,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-NEXT:    %sum.next = add i64 %sum, %add
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
 ; EPILOG:       loop_exiting_bb1.1:
 ; EPILOG-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
@@ -852,7 +852,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.2, label %loop_exiting_bb1.2
 ; EPILOG:       loop_exiting_bb1.2:
 ; EPILOG-NEXT:    switch i64 %sum.next.1, label %loop_latch.2 [
@@ -862,7 +862,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
 ; EPILOG-NEXT:    %sum.next.2 = add i64 %sum.next.1, %add
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.3, label %loop_exiting_bb1.3
 ; EPILOG:       loop_exiting_bb1.3:
 ; EPILOG-NEXT:    switch i64 %sum.next.2, label %loop_latch.3 [
@@ -872,7 +872,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
 ; EPILOG-NEXT:    %sum.next.3 = add i64 %sum.next.2, %add
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.4, label %loop_exiting_bb1.4
 ; EPILOG:       loop_exiting_bb1.4:
 ; EPILOG-NEXT:    switch i64 %sum.next.3, label %loop_latch.4 [
@@ -882,7 +882,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
 ; EPILOG-NEXT:    %sum.next.4 = add i64 %sum.next.3, %add
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.5, label %loop_exiting_bb1.5
 ; EPILOG:       loop_exiting_bb1.5:
 ; EPILOG-NEXT:    switch i64 %sum.next.4, label %loop_latch.5 [
@@ -892,7 +892,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
 ; EPILOG-NEXT:    %sum.next.5 = add i64 %sum.next.4, %add
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.6, label %loop_exiting_bb1.6
 ; EPILOG:       loop_exiting_bb1.6:
 ; EPILOG-NEXT:    switch i64 %sum.next.5, label %loop_latch.6 [
@@ -902,7 +902,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
 ; EPILOG-NEXT:    %sum.next.6 = add i64 %sum.next.5, %add
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.7, label %loop_exiting_bb1.7
 ; EPILOG:       loop_exiting_bb1.7:
 ; EPILOG-NEXT:    switch i64 %sum.next.6, label %loop_latch.7 [
@@ -912,8 +912,8 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add nuw nsw i64 %iv_next.6, 1
 ; EPILOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
@@ -935,7 +935,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG:       loop_exiting_bb1.epil:
 ; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
@@ -946,8 +946,8 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
 ; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.loopexit.epilog-lcssa, !llvm.loop !3
 ; EPILOG:       exit2.loopexit.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit2.loopexit
@@ -966,7 +966,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum = phi i64 [ 0, %entry.new ], [ %sum.next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 undef, label %loop_latch, label %loop_exiting_bb1
 ; EPILOG-BLOCK:       loop_exiting_bb1:
 ; EPILOG-BLOCK-NEXT:    switch i64 %sum, label %loop_latch [
@@ -980,7 +980,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 undef, label %loop_latch.1, label %loop_exiting_bb1.1
 ; EPILOG-BLOCK:       loop_exiting_bb1.1:
 ; EPILOG-BLOCK-NEXT:    switch i64 %sum.next, label %loop_latch.1 [
@@ -990,8 +990,8 @@ define void @test3(i64 %trip, i64 %add) {
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !3
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
@@ -1029,7 +1029,7 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
 ; PROLOG-NEXT:    %sum.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %sum.next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    br i1 undef, label %loop_latch.prol, label %loop_exiting_bb1.prol
 ; PROLOG:       loop_exiting_bb1.prol:
 ; PROLOG-NEXT:    switch i64 %sum.prol, label %loop_latch.prol [
@@ -1040,8 +1040,8 @@ define void @test3(i64 %trip, i64 %add) {
 ; PROLOG-NEXT:    %iv_next.prol = add nuw nsw i64 %iv.prol, 1
 ; PROLOG-NEXT:    %sum.next.prol = add i64 %sum.prol, %add
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !3
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -1254,7 +1254,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -1264,7 +1264,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -1274,7 +1274,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
@@ -1284,7 +1284,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
 ; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
@@ -1294,7 +1294,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
 ; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
@@ -1304,7 +1304,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
 ; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
@@ -1314,7 +1314,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
 ; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
@@ -1324,7 +1324,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
 ; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
@@ -1334,8 +1334,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
 ; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
 ; EPILOG:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
@@ -1353,7 +1353,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
@@ -1364,8 +1364,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ]
@@ -1398,7 +1398,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -1408,7 +1408,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-BLOCK-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -1418,8 +1418,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
@@ -1469,7 +1469,7 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %latch.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %latch.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %latchExit.unr-lcssa.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %cmp.prol = icmp eq i64 %n, 42
@@ -1480,8 +1480,8 @@ define i32 @hdr_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add.prol = add nsw i32 %1, %sum.02.prol
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !4
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %result.unr.ph = phi i32 [ %add.prol, %latch.prol ]
@@ -1698,7 +1698,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -1708,7 +1708,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -1718,7 +1718,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
@@ -1728,7 +1728,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
 ; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
@@ -1738,7 +1738,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
 ; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
@@ -1748,7 +1748,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
 ; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
@@ -1758,7 +1758,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
 ; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
@@ -1768,7 +1768,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
 ; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
@@ -1778,8 +1778,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
 ; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
 ; EPILOG:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
@@ -1797,7 +1797,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
@@ -1808,8 +1808,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !5
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ]
@@ -1842,7 +1842,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -1852,7 +1852,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-BLOCK-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -1862,8 +1862,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
@@ -1913,7 +1913,7 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %latch.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %latch.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %cmp.prol = icmp eq i64 %n, 42
@@ -1924,8 +1924,8 @@ define i32 @otherblock_latch_same_exit(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add.prol = add nsw i32 %1, %sum.02.prol
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !5
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %result.unr.ph = phi i32 [ %add.prol, %latch.prol ]
@@ -2143,7 +2143,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -2153,7 +2153,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -2163,7 +2163,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
@@ -2173,7 +2173,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %4 = load i32, i32* %arrayidx.2, align 4
 ; EPILOG-NEXT:    %add.2 = add nsw i32 %4, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
@@ -2183,7 +2183,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %5 = load i32, i32* %arrayidx.3, align 4
 ; EPILOG-NEXT:    %add.3 = add nsw i32 %5, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
@@ -2193,7 +2193,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %6 = load i32, i32* %arrayidx.4, align 4
 ; EPILOG-NEXT:    %add.4 = add nsw i32 %6, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
@@ -2203,7 +2203,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %7 = load i32, i32* %arrayidx.5, align 4
 ; EPILOG-NEXT:    %add.5 = add nsw i32 %7, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
@@ -2213,7 +2213,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %8 = load i32, i32* %arrayidx.6, align 4
 ; EPILOG-NEXT:    %add.6 = add nsw i32 %8, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
@@ -2223,8 +2223,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %9 = load i32, i32* %arrayidx.7, align 4
 ; EPILOG-NEXT:    %add.7 = add nsw i32 %9, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
 ; EPILOG:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
@@ -2242,7 +2242,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
@@ -2253,8 +2253,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %10, %sum.02.epil
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !6
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
@@ -2287,7 +2287,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -2297,7 +2297,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %2 = load i32, i32* %arrayidx, align 4
 ; EPILOG-BLOCK-NEXT:    %add = add nsw i32 %2, %sum.02
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -2307,8 +2307,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %3 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %3, %add
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
@@ -2358,7 +2358,7 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %latch.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %latch.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %cmp.prol = icmp eq i64 %n, 42
@@ -2369,8 +2369,8 @@ define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG-NEXT:    %add.prol = add nsw i32 %1, %sum.02.prol
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !6
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %result.unr.ph = phi i32 [ %add.prol, %latch.prol ]
@@ -2589,7 +2589,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
@@ -2599,7 +2599,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp, label %latchExit.epilog-lcssa.loopexit, label %latch
 ; EPILOG:       latch:
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
@@ -2609,7 +2609,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %arrayidx.2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.1
@@ -2619,7 +2619,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.2, label %latchExit.epilog-lcssa.loopexit, label %latch.2
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %arrayidx.3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.2
@@ -2629,7 +2629,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.3, label %latchExit.epilog-lcssa.loopexit, label %latch.3
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %arrayidx.4 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.3
@@ -2639,7 +2639,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.4, label %latchExit.epilog-lcssa.loopexit, label %latch.4
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %arrayidx.5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.4
@@ -2649,7 +2649,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.5, label %latchExit.epilog-lcssa.loopexit, label %latch.5
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %arrayidx.6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.5
@@ -2659,7 +2659,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.6, label %latchExit.epilog-lcssa.loopexit, label %latch.6
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %arrayidx.7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next.6
@@ -2669,8 +2669,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cmp.7, label %latchExit.epilog-lcssa.loopexit, label %latch.7
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
 ; EPILOG:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
@@ -2688,7 +2688,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
@@ -2699,8 +2699,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !7
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
@@ -2733,7 +2733,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
@@ -2743,7 +2743,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp, label %latchExit.epilog-lcssa.loopexit, label %latch
 ; EPILOG-BLOCK:       latch:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
@@ -2753,8 +2753,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.1, label %latchExit.epilog-lcssa.loopexit, label %latch.1
 ; EPILOG-BLOCK:       latch.1:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
 ; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
@@ -2804,7 +2804,7 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %latch.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %latch.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %for.exit2.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.prol
@@ -2815,8 +2815,8 @@ define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) {
 ; PROLOG:       latch.prol:
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !7
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %result.unr.ph = phi i32 [ %add.prol, %latch.prol ]
@@ -3078,7 +3078,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch, label %loop_exiting
 ; EPILOG:       loop_exiting:
 ; EPILOG-NEXT:    %ivy = add i64 %iv, %add
@@ -3089,7 +3089,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-NEXT:    %sum.next = add i64 %sum, %add
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
 ; EPILOG:       loop_exiting.1:
 ; EPILOG-NEXT:    %ivy.1 = add i64 %iv_next, %add
@@ -3100,7 +3100,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.2, label %loop_exiting.2
 ; EPILOG:       loop_exiting.2:
 ; EPILOG-NEXT:    %ivy.2 = add i64 %iv_next.1, %add
@@ -3111,7 +3111,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
 ; EPILOG-NEXT:    %sum.next.2 = add i64 %sum.next.1, %add
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.3, label %loop_exiting.3
 ; EPILOG:       loop_exiting.3:
 ; EPILOG-NEXT:    %ivy.3 = add i64 %iv_next.2, %add
@@ -3122,7 +3122,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
 ; EPILOG-NEXT:    %sum.next.3 = add i64 %sum.next.2, %add
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.4, label %loop_exiting.4
 ; EPILOG:       loop_exiting.4:
 ; EPILOG-NEXT:    %ivy.4 = add i64 %iv_next.3, %add
@@ -3133,7 +3133,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
 ; EPILOG-NEXT:    %sum.next.4 = add i64 %sum.next.3, %add
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.5, label %loop_exiting.5
 ; EPILOG:       loop_exiting.5:
 ; EPILOG-NEXT:    %ivy.5 = add i64 %iv_next.4, %add
@@ -3144,7 +3144,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
 ; EPILOG-NEXT:    %sum.next.5 = add i64 %sum.next.4, %add
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.6, label %loop_exiting.6
 ; EPILOG:       loop_exiting.6:
 ; EPILOG-NEXT:    %ivy.6 = add i64 %iv_next.5, %add
@@ -3155,7 +3155,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
 ; EPILOG-NEXT:    %sum.next.6 = add i64 %sum.next.5, %add
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.7, label %loop_exiting.7
 ; EPILOG:       loop_exiting.7:
 ; EPILOG-NEXT:    %ivy.7 = add i64 %iv_next.6, %add
@@ -3166,8 +3166,8 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add nuw nsw i64 %iv_next.6, 1
 ; EPILOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %latchexit.unr-lcssa.loopexit
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
@@ -3194,7 +3194,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
 ; EPILOG:       loop_exiting.epil:
 ; EPILOG-NEXT:    %ivy.epil = add i64 %iv.epil, %add
@@ -3206,8 +3206,8 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
 ; EPILOG-NEXT:    %sum.next.epil = add i64 %sum.epil, %add
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %latchexit.epilog-lcssa, !llvm.loop !8
 ; EPILOG:       latchexit.epilog-lcssa:
 ; EPILOG-NEXT:    %sum.next.lcssa.ph1 = phi i64 [ %sum.next.epil, %loop_latch.epil ]
@@ -3228,7 +3228,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum = phi i64 [ 0, %entry.new ], [ %sum.next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch, label %loop_exiting
 ; EPILOG-BLOCK:       loop_exiting:
 ; EPILOG-BLOCK-NEXT:    %ivy = add i64 %iv, %add
@@ -3239,7 +3239,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
 ; EPILOG-BLOCK-NEXT:    %sum.next = add i64 %sum, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %loop_exiting.1
 ; EPILOG-BLOCK:       loop_exiting.1:
 ; EPILOG-BLOCK-NEXT:    %ivy.1 = add i64 %iv_next, %add
@@ -3250,8 +3250,8 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
 ; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !8
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
@@ -3298,7 +3298,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
 ; PROLOG-NEXT:    %sum.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %sum.next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.prol, label %loop_exiting.prol
 ; PROLOG:       loop_exiting.prol:
 ; PROLOG-NEXT:    %ivy.prol = add i64 %iv.prol, %add
@@ -3310,8 +3310,8 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; PROLOG-NEXT:    %iv_next.prol = add nuw nsw i64 %iv.prol, 1
 ; PROLOG-NEXT:    %sum.next.prol = add i64 %sum.prol, %add
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !8
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -3534,7 +3534,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG:       for.exiting_block:
 ; EPILOG-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -3544,7 +3544,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load = load i32, i32* %arrayidx, align 4
 ; EPILOG-NEXT:    %add = add nsw i32 %load, %sum.02
 ; EPILOG-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG:       for.exiting_block.1:
 ; EPILOG-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -3554,7 +3554,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-NEXT:    %add.1 = add nsw i32 %load.1, %add
 ; EPILOG-NEXT:    %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.2
 ; EPILOG:       for.exiting_block.2:
 ; EPILOG-NEXT:    %cmp.2 = icmp eq i64 %n, 42
@@ -3564,7 +3564,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.2 = load i32, i32* %arrayidx.2, align 4
 ; EPILOG-NEXT:    %add.2 = add nsw i32 %load.2, %add.1
 ; EPILOG-NEXT:    %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.3
 ; EPILOG:       for.exiting_block.3:
 ; EPILOG-NEXT:    %cmp.3 = icmp eq i64 %n, 42
@@ -3574,7 +3574,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.3 = load i32, i32* %arrayidx.3, align 4
 ; EPILOG-NEXT:    %add.3 = add nsw i32 %load.3, %add.2
 ; EPILOG-NEXT:    %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.4
 ; EPILOG:       for.exiting_block.4:
 ; EPILOG-NEXT:    %cmp.4 = icmp eq i64 %n, 42
@@ -3584,7 +3584,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.4 = load i32, i32* %arrayidx.4, align 4
 ; EPILOG-NEXT:    %add.4 = add nsw i32 %load.4, %add.3
 ; EPILOG-NEXT:    %indvars.iv.next.4 = add nuw nsw i64 %indvars.iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.5
 ; EPILOG:       for.exiting_block.5:
 ; EPILOG-NEXT:    %cmp.5 = icmp eq i64 %n, 42
@@ -3594,7 +3594,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.5 = load i32, i32* %arrayidx.5, align 4
 ; EPILOG-NEXT:    %add.5 = add nsw i32 %load.5, %add.4
 ; EPILOG-NEXT:    %indvars.iv.next.5 = add nuw nsw i64 %indvars.iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.6
 ; EPILOG:       for.exiting_block.6:
 ; EPILOG-NEXT:    %cmp.6 = icmp eq i64 %n, 42
@@ -3604,7 +3604,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.6 = load i32, i32* %arrayidx.6, align 4
 ; EPILOG-NEXT:    %add.6 = add nsw i32 %load.6, %add.5
 ; EPILOG-NEXT:    %indvars.iv.next.6 = add nuw nsw i64 %indvars.iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.7
 ; EPILOG:       for.exiting_block.7:
 ; EPILOG-NEXT:    %cmp.7 = icmp eq i64 %n, 42
@@ -3614,8 +3614,8 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %load.7 = load i32, i32* %arrayidx.7, align 4
 ; EPILOG-NEXT:    %add.7 = add nsw i32 %load.7, %add.6
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latch_exit.unr-lcssa.loopexit, label %header
 ; EPILOG:       latch_exit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %latch.7 ]
@@ -3633,7 +3633,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
 ; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
@@ -3644,8 +3644,8 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.epil
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latch_exit.epilog-lcssa, !llvm.loop !9
 ; EPILOG:       latch_exit.epilog-lcssa:
 ; EPILOG-NEXT:    %sum.0.lcssa.ph1 = phi i32 [ %add.epil, %latch.epil ]
@@ -3680,7 +3680,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %indvars.iv = phi i64 [ 0, %entry.new ], [ %indvars.iv.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %sum.02 = phi i32 [ 0, %entry.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block
 ; EPILOG-BLOCK:       for.exiting_block:
 ; EPILOG-BLOCK-NEXT:    %cmp = icmp eq i64 %n, 42
@@ -3690,7 +3690,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %load = load i32, i32* %arrayidx, align 4
 ; EPILOG-BLOCK-NEXT:    %add = add nsw i32 %load, %sum.02
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2.loopexit, label %for.exiting_block.1
 ; EPILOG-BLOCK:       for.exiting_block.1:
 ; EPILOG-BLOCK-NEXT:    %cmp.1 = icmp eq i64 %n, 42
@@ -3700,8 +3700,8 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %load.1 = load i32, i32* %arrayidx.1, align 4
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nsw i32 %load.1, %add
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latch_exit.unr-lcssa.loopexit, label %header, !llvm.loop !9
 ; EPILOG-BLOCK:       latch_exit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %latch.1 ]
@@ -3752,7 +3752,7 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %latch.prol ], [ 0, %header.prol.preheader ]
 ; PROLOG-NEXT:    %sum.02.prol = phi i32 [ %add.prol, %latch.prol ], [ 0, %header.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 false, label %for.exit2.loopexit1, label %for.exiting_block.prol
 ; PROLOG:       for.exiting_block.prol:
 ; PROLOG-NEXT:    %cmp.prol = icmp eq i64 %n, 42
@@ -3763,8 +3763,8 @@ define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; PROLOG-NEXT:    %add.prol = add nsw i32 %load.prol, %sum.02.prol
 ; PROLOG-NEXT:    %indvars.iv.next.prol = add i64 %indvars.iv.prol, 1
 ; PROLOG-NEXT:    %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !9
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %sum.0.lcssa.unr.ph = phi i32 [ %add.prol, %latch.prol ]
@@ -3998,40 +3998,40 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-NEXT:    br label %header
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %i6 = phi i64 [ 1, %preheader.new ], [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %preheader.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %preheader.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch
 ; EPILOG:       latch:
 ; EPILOG-NEXT:    %add = add nuw nsw i64 %i6, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %add.1 = add nuw nsw i64 %add, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.2
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %add.2 = add nuw nsw i64 %add.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.3
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %add.3 = add nuw nsw i64 %add.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.4
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %add.4 = add nuw nsw i64 %add.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.5
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %add.5 = add nuw nsw i64 %add.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.6
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %add.6 = add nuw nsw i64 %add.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.7
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %add.7 = add nuw nsw i64 %add.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchexit.unr-lcssa.loopexit
 ; EPILOG:       latchexit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %i6.unr.ph = phi i64 [ %add.7, %latch.7 ]
@@ -4044,13 +4044,13 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %i6.epil = phi i64 [ %i6.unr, %header.epil.preheader ], [ %add.epil, %latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit1, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %add.epil = add nuw nsw i64 %i6.epil, 1
 ; EPILOG-NEXT:    %i9.epil = icmp slt i64 %add.epil, %sext
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchexit.epilog-lcssa, !llvm.loop !10
 ; EPILOG:       latchexit.epilog-lcssa:
 ; EPILOG-NEXT:    br label %latchexit
@@ -4085,16 +4085,16 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK-NEXT:    br label %header
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %i6 = phi i64 [ 1, %preheader.new ], [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %preheader.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %preheader.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch
 ; EPILOG-BLOCK:       latch:
 ; EPILOG-BLOCK-NEXT:    %add = add nuw nsw i64 %i6, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1.loopexit, label %latch.1
 ; EPILOG-BLOCK:       latch.1:
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %add, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !10
 ; EPILOG-BLOCK:       latchexit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %latchexit.unr-lcssa
@@ -4134,13 +4134,13 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; PROLOG-NEXT:    br label %header.prol
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %i6.prol = phi i64 [ 1, %header.prol.preheader ], [ %add.prol, %latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    br i1 false, label %loopexit1.loopexit1, label %latch.prol
 ; PROLOG:       latch.prol:
 ; PROLOG-NEXT:    %add.prol = add nuw nsw i64 %i6.prol, 1
 ; PROLOG-NEXT:    %i9.prol = icmp slt i64 %add.prol, %sext
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !10
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %i6.unr.ph = phi i64 [ %add.prol, %latch.prol ]
@@ -4296,40 +4296,40 @@ define void @test8() {
 ; EPILOG-NEXT:    br label %innerH
 ; EPILOG:       innerH:
 ; EPILOG-NEXT:    %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %outerloop.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %i4 = add nuw nsw i64 %i3, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch
 ; EPILOG:       latch:
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
 ; EPILOG:       latch.1:
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    %i4.2 = add nuw nsw i64 %i4.1, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.2
 ; EPILOG:       latch.2:
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    %i4.3 = add nuw nsw i64 %i4.2, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.3
 ; EPILOG:       latch.3:
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    %i4.4 = add nuw nsw i64 %i4.3, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.4
 ; EPILOG:       latch.4:
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    %i4.5 = add nuw nsw i64 %i4.4, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.5
 ; EPILOG:       latch.5:
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    %i4.6 = add nuw nsw i64 %i4.5, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.6
 ; EPILOG:       latch.6:
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    %i4.7 = add nuw nsw i64 %i4.6, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
 ; EPILOG:       latch.7:
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit
 ; EPILOG:       exit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ]
@@ -4342,13 +4342,13 @@ define void @test8() {
 ; EPILOG-NEXT:    br label %innerH.epil
 ; EPILOG:       innerH.epil:
 ; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %innerH.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %innerH.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    %i4.epil = add nuw nsw i64 %i3.epil, 1
 ; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %i6.epil = icmp ult i64 %i4.epil, 100
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %innerH.epil, label %exit.epilog-lcssa, !llvm.loop !11
 ; EPILOG:       exit.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit
@@ -4368,16 +4368,16 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    br label %innerH.1
 ; EPILOG-BLOCK:       innerH.1:
 ; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 100, %outerloop.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13
-; EPILOG-BLOCK:       latch.13:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.12 = sub i64 %niter.1, 1
-; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
+; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 0, %outerloop.new.1 ], [ %niter.next.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %i4.12 = add nuw nsw i64 %i3.1, 1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.14
+; EPILOG-BLOCK:       latch.14:
+; EPILOG-BLOCK-NEXT:    %niter.next.13 = add nuw nsw i64 %niter.1, 1
+; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.12, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
 ; EPILOG-BLOCK:       latch.1.1:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i64 %niter.nsub.12, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.nsub.1.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1.1 = add i64 %niter.next.13, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.next.1.1, 100
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !11
 ; EPILOG-BLOCK:       exit.unr-lcssa.loopexit.1:
 ; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa.1
@@ -4403,16 +4403,16 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    br label %innerH
 ; EPILOG-BLOCK:       innerH:
 ; EPILOG-BLOCK-NEXT:    %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %outerloop.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %i4 = add nuw nsw i64 %i3, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch
 ; EPILOG-BLOCK:       latch:
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
 ; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
 ; EPILOG-BLOCK:       exit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa
@@ -4450,13 +4450,13 @@ define void @test8() {
 ; PROLOG-NEXT:    br label %innerH.prol
 ; PROLOG:       innerH.prol:
 ; PROLOG-NEXT:    %i3.prol = phi i64 [ %i4.prol, %latch.prol ], [ %i, %innerH.prol.preheader ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %innerH.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %innerH.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    %i4.prol = add nuw nsw i64 %i3.prol, 1
 ; PROLOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.prol
 ; PROLOG:       latch.prol:
 ; PROLOG-NEXT:    %i6.prol = icmp ult i64 %i4.prol, 100
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %innerH.prol, label %innerH.prol.loopexit.unr-lcssa, !llvm.loop !11
 ; PROLOG:       innerH.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.prol, %latch.prol ]
@@ -4620,7 +4620,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    br label %header
 ; EPILOG:       header:
 ; EPILOG-NEXT:    %phi = phi i64 [ %i4, %preheader.new ], [ %iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.7, %latch.7 ]
 ; EPILOG-NEXT:    br i1 true, label %latch, label %innerexit.loopexit
 ; EPILOG:       innerexit.loopexit:
 ; EPILOG-NEXT:    %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ]
@@ -4634,36 +4634,36 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    ret i8 addrspace(1)* %i9
 ; EPILOG:       latch:
 ; EPILOG-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
-; EPILOG-NEXT:    %niter.nsub = sub i32 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i32 %niter, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit
 ; EPILOG:       latch.1:
 ; EPILOG-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i32 %niter.next, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.2, label %innerexit.loopexit
 ; EPILOG:       latch.2:
 ; EPILOG-NEXT:    %iv.next.2 = add nuw nsw i64 %iv.next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i32 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i32 %niter.next.1, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.3, label %innerexit.loopexit
 ; EPILOG:       latch.3:
 ; EPILOG-NEXT:    %iv.next.3 = add nuw nsw i64 %iv.next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i32 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i32 %niter.next.2, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.4, label %innerexit.loopexit
 ; EPILOG:       latch.4:
 ; EPILOG-NEXT:    %iv.next.4 = add nuw nsw i64 %iv.next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i32 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i32 %niter.next.3, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.5, label %innerexit.loopexit
 ; EPILOG:       latch.5:
 ; EPILOG-NEXT:    %iv.next.5 = add nuw nsw i64 %iv.next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i32 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i32 %niter.next.4, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.6, label %innerexit.loopexit
 ; EPILOG:       latch.6:
 ; EPILOG-NEXT:    %iv.next.6 = add nuw nsw i64 %iv.next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i32 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i32 %niter.next.5, 1
 ; EPILOG-NEXT:    br i1 true, label %latch.7, label %innerexit.loopexit
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %iv.next.7 = add nuw nsw i64 %iv.next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i32 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i32 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit
 ; EPILOG:       outerLatch.loopexit.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ]
@@ -4676,15 +4676,15 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
 ; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i32 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i32 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    %i7.epil = trunc i64 %phi.epil to i32
 ; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %i11.epil = add nsw i32 %i7.epil, 1
 ; EPILOG-NEXT:    %innercnd.epil = icmp slt i32 %i11.epil, %trip
 ; EPILOG-NEXT:    %iv.next.epil = add nuw nsw i64 %phi.epil, 1
-; EPILOG-NEXT:    %epil.iter.sub = sub i32 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i32 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %outerLatch.loopexit.epilog-lcssa, !llvm.loop !12
 ; EPILOG:       outerLatch.loopexit.epilog-lcssa:
 ; EPILOG-NEXT:    br label %outerLatch.loopexit
@@ -4711,7 +4711,7 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    br label %header
 ; EPILOG-BLOCK:       header:
 ; EPILOG-BLOCK-NEXT:    %phi = phi i64 [ 0, %preheader.new ], [ %iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i32 [ 0, %preheader.new ], [ %niter.next.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 true, label %latch, label %innerexit.loopexit.loopexit
 ; EPILOG-BLOCK:       innerexit.loopexit.loopexit:
 ; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ]
@@ -4731,12 +4731,12 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    ret i8 addrspace(1)* %i9
 ; EPILOG-BLOCK:       latch:
 ; EPILOG-BLOCK-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i32 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i32 %niter, 1
 ; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit.loopexit
 ; EPILOG-BLOCK:       latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i32 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !13
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa
@@ -4764,16 +4764,16 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    br label %header.1
 ; EPILOG-BLOCK:       header.1:
 ; EPILOG-BLOCK-NEXT:    %phi.1 = phi i64 [ 0, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %niter.1 = phi i32 [ %unroll_iter.1, %preheader.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %niter.1 = phi i32 [ 0, %preheader.new.1 ], [ %niter.next.1.1, %latch.1.1 ]
 ; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.15, label %innerexit.loopexit.loopexit6
 ; EPILOG-BLOCK:       latch.15:
 ; EPILOG-BLOCK-NEXT:    %iv.next.13 = add nuw nsw i64 %phi.1, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.14 = sub i32 %niter.1, 1
+; EPILOG-BLOCK-NEXT:    %niter.next.14 = add nuw nsw i32 %niter.1, 1
 ; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit6
 ; EPILOG-BLOCK:       latch.1.1:
 ; EPILOG-BLOCK-NEXT:    %iv.next.1.1 = add nuw nsw i64 %iv.next.13, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i32 %niter.nsub.14, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i32 %niter.nsub.1.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1.1 = add i32 %niter.next.14, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i32 %niter.next.1.1, %unroll_iter.1
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %header.1, label %outerLatch.loopexit.unr-lcssa.loopexit.1, !llvm.loop !13
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit.1:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa.1
@@ -4809,15 +4809,15 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; PROLOG-NEXT:    br label %header.prol
 ; PROLOG:       header.prol:
 ; PROLOG-NEXT:    %phi.prol = phi i64 [ %i4, %header.prol.preheader ], [ %iv.next.prol, %latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i32 [ %xtraiter, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i32 [ 0, %header.prol.preheader ], [ %prol.iter.next, %latch.prol ]
 ; PROLOG-NEXT:    %i7.prol = trunc i64 %phi.prol to i32
 ; PROLOG-NEXT:    br i1 true, label %latch.prol, label %innerexit.loopexit1
 ; PROLOG:       latch.prol:
 ; PROLOG-NEXT:    %i11.prol = add nsw i32 %i7.prol, 1
 ; PROLOG-NEXT:    %innercnd.prol = icmp slt i32 %i11.prol, %trip
 ; PROLOG-NEXT:    %iv.next.prol = add nuw nsw i64 %phi.prol, 1
-; PROLOG-NEXT:    %prol.iter.sub = sub i32 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i32 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %header.prol, label %header.prol.loopexit.unr-lcssa, !llvm.loop !12
 ; PROLOG:       header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.prol, %latch.prol ]
@@ -5019,56 +5019,56 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early, label %loop_latch, label %exit1.loopexit
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.2, label %loop_latch.2, label %exit1.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.3, label %loop_latch.3, label %exit1.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.4, label %loop_latch.4, label %exit1.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.5, label %loop_latch.5, label %exit1.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.6, label %loop_latch.6, label %exit1.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.7, label %loop_latch.7, label %exit1.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
@@ -5087,15 +5087,15 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1.loopexit1
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !13
 ; EPILOG:       exit2.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit2
@@ -5113,20 +5113,20 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early, label %loop_latch, label %exit1.loopexit
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_latch.1, label %exit1.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !15
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
@@ -5160,15 +5160,15 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.prol = icmp ne i64 %iv.prol, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.prol, label %loop_latch.prol, label %exit1.loopexit1
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !13
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -5312,48 +5312,48 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch, label %exit1.loopexit
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.2, label %exit1.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.3, label %exit1.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.4, label %exit1.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.5, label %exit1.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.6, label %exit1.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.7, label %exit1.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
@@ -5372,14 +5372,14 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.loopexit1
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit2.epilog-lcssa, !llvm.loop !14
 ; EPILOG:       exit2.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit2
@@ -5397,18 +5397,18 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch, label %exit1.loopexit
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !16
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
@@ -5439,14 +5439,14 @@ define void @test11(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    br i1 %cond, label %loop_latch.prol, label %exit1.loopexit1
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !14
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -5578,7 +5578,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
@@ -5586,7 +5586,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
@@ -5594,7 +5594,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit
@@ -5602,7 +5602,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit
@@ -5610,7 +5610,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit
@@ -5618,7 +5618,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit
@@ -5626,7 +5626,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit
@@ -5634,7 +5634,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit
@@ -5642,8 +5642,8 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
 ; EPILOG:       exit1.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
@@ -5656,7 +5656,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
@@ -5665,8 +5665,8 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !15
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
@@ -5688,7 +5688,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
@@ -5696,7 +5696,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
@@ -5704,8 +5704,8 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !17
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
@@ -5741,7 +5741,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.prol = icmp ne i64 %iv.prol, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.prol, label %loop_exiting_bb2.prol, label %exit1.unr-lcssa.loopexit1
@@ -5750,8 +5750,8 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !15
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -5914,7 +5914,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
@@ -5923,7 +5923,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
@@ -5932,7 +5932,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.2 = icmp ne i64 %iv_next.1, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.2, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit
@@ -5941,7 +5941,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.2, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.3 = icmp ne i64 %iv_next.2, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.3, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit
@@ -5950,7 +5950,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.3, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.4 = icmp ne i64 %iv_next.3, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.4, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit
@@ -5959,7 +5959,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.4, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.5 = icmp ne i64 %iv_next.4, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.5, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit
@@ -5968,7 +5968,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.5, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.6 = icmp ne i64 %iv_next.5, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.6, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit
@@ -5977,7 +5977,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.6, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.7 = icmp ne i64 %iv_next.6, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.7, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit
@@ -5986,8 +5986,8 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br i1 %unknown.7, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
 ; EPILOG:       exit1.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
@@ -6000,7 +6000,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
 ; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
@@ -6010,8 +6010,8 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !16
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
@@ -6033,7 +6033,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early = icmp ne i64 %iv, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
@@ -6042,7 +6042,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    %cmp_early.1 = icmp ne i64 %iv_next, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.1, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
@@ -6051,8 +6051,8 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !18
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
@@ -6089,7 +6089,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    %cmp_early.prol = icmp ne i64 %iv.prol, %trip2
 ; PROLOG-NEXT:    br i1 %cmp_early.prol, label %loop_exiting_bb2.prol, label %exit1.unr-lcssa.loopexit1
@@ -6099,8 +6099,8 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !16
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]
@@ -6273,7 +6273,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header
 ; EPILOG:       loop_header:
 ; EPILOG-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2:
@@ -6281,7 +6281,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch:
 ; EPILOG-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.1:
@@ -6289,7 +6289,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.1:
 ; EPILOG-NEXT:    %iv_next.1 = add nuw nsw i64 %iv_next, 1
-; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %niter.next.1 = add nuw nsw i64 %niter.next, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.2:
@@ -6297,7 +6297,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.2, label %loop_latch.2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.2:
 ; EPILOG-NEXT:    %iv_next.2 = add nuw nsw i64 %iv_next.1, 1
-; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %niter.next.2 = add nuw nsw i64 %niter.next.1, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.3, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.3:
@@ -6305,7 +6305,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.3, label %loop_latch.3, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.3:
 ; EPILOG-NEXT:    %iv_next.3 = add nuw nsw i64 %iv_next.2, 1
-; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %niter.next.3 = add nuw nsw i64 %niter.next.2, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.4, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.4:
@@ -6313,7 +6313,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.4, label %loop_latch.4, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.4:
 ; EPILOG-NEXT:    %iv_next.4 = add nuw nsw i64 %iv_next.3, 1
-; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %niter.next.4 = add nuw nsw i64 %niter.next.3, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.5, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.5:
@@ -6321,7 +6321,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.5, label %loop_latch.5, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.5:
 ; EPILOG-NEXT:    %iv_next.5 = add nuw nsw i64 %iv_next.4, 1
-; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %niter.next.5 = add nuw nsw i64 %niter.next.4, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.6, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.6:
@@ -6329,7 +6329,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.6, label %loop_latch.6, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.6:
 ; EPILOG-NEXT:    %iv_next.6 = add nuw nsw i64 %iv_next.5, 1
-; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %niter.next.6 = add nuw nsw i64 %niter.next.5, 1
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.7, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_exiting_bb2.7:
@@ -6337,8 +6337,8 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 %unknown.7, label %loop_latch.7, label %exit1.epilog-lcssa.loopexit
 ; EPILOG:       loop_latch.7:
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv_next.6, 1
-; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
-; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    %niter.next.7 = add i64 %niter.next.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
 ; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
 ; EPILOG:       exit1.unr-lcssa.loopexit:
 ; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
@@ -6351,7 +6351,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
 ; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %loop_header.epil.preheader ], [ %epil.iter.sub, %loop_latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
 ; EPILOG:       loop_exiting_bb2.epil:
@@ -6360,8 +6360,8 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
-; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
 ; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !17
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
@@ -6383,7 +6383,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
 ; EPILOG-BLOCK:       loop_header:
 ; EPILOG-BLOCK-NEXT:    %iv = phi i64 [ 0, %entry.new ], [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ 0, %entry.new ], [ %niter.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_exiting_bb2, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_exiting_bb2:
@@ -6391,7 +6391,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %unknown, label %loop_latch, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch:
 ; EPILOG-BLOCK-NEXT:    %iv_next = add nuw nsw i64 %iv, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %niter.next = add nuw nsw i64 %niter, 1
 ; EPILOG-BLOCK-NEXT:    call void @bar()
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_exiting_bb2.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_exiting_bb2.1:
@@ -6399,8 +6399,8 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    br i1 %unknown.1, label %loop_latch.1, label %exit1.epilog-lcssa.loopexit
 ; EPILOG-BLOCK:       loop_latch.1:
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv_next, 1
-; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
-; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter.next, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
 ; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !19
 ; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
@@ -6434,7 +6434,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG-NEXT:    br label %loop_header.prol
 ; PROLOG:       loop_header.prol:
 ; PROLOG-NEXT:    %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-NEXT:    %prol.iter = phi i64 [ %xtraiter, %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ]
+; PROLOG-NEXT:    %prol.iter = phi i64 [ 0, %loop_header.prol.preheader ], [ %prol.iter.next, %loop_latch.prol ]
 ; PROLOG-NEXT:    call void @bar()
 ; PROLOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.prol, label %exit1.unr-lcssa.loopexit1
 ; PROLOG:       loop_exiting_bb2.prol:
@@ -6443,8 +6443,8 @@ define void @test14(i64 %trip, i1 %cond) {
 ; PROLOG:       loop_latch.prol:
 ; PROLOG-NEXT:    %iv_next.prol = add i64 %iv.prol, 1
 ; PROLOG-NEXT:    %cmp.prol = icmp ne i64 %iv_next.prol, %trip
-; PROLOG-NEXT:    %prol.iter.sub = sub i64 %prol.iter, 1
-; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.sub, 0
+; PROLOG-NEXT:    %prol.iter.next = add i64 %prol.iter, 1
+; PROLOG-NEXT:    %prol.iter.cmp = icmp ne i64 %prol.iter.next, %xtraiter
 ; PROLOG-NEXT:    br i1 %prol.iter.cmp, label %loop_header.prol, label %loop_header.prol.loopexit.unr-lcssa, !llvm.loop !17
 ; PROLOG:       loop_header.prol.loopexit.unr-lcssa:
 ; PROLOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.prol, %loop_latch.prol ]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 98316905e1fe..3deda2f64986 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -32,8 +32,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; EPILOG: for.body.epil:
 ; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
-; EPILOG:  %epil.iter.sub = sub i32 %epil.iter, 1
-; EPILOG:  %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG:  %epil.iter.next = add i32 %epil.iter, 1
+; EPILOG:  %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
 ; EPILOG:  br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
 
 ; NOEPILOG: for.body:
@@ -41,8 +41,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; PROLOG: for.body.prol:
 ; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
-; PROLOG:  %prol.iter.sub = sub i32 %prol.iter, 1
-; PROLOG:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG:  %prol.iter.next = add i32 %prol.iter, 1
+; PROLOG:  %prol.iter.cmp = icmp ne i32 %prol.iter.next, %xtraiter
 ; PROLOG:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
 
 ; NOPROLOG: for.body:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index 28cd885dbc44..88ebdf93d69e 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -76,28 +76,28 @@ define i3 @test(i3* %a, i3 %n) {
 ; UNROLL-4:       for.body:
 ; UNROLL-4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT:    [[NITER:%.*]] = phi i3 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY]] ]
+; UNROLL-4-NEXT:    [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i3, i3* [[A:%.*]], i64 [[INDVARS_IV]]
 ; UNROLL-4-NEXT:    [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX]], align 1
 ; UNROLL-4-NEXT:    [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-4-NEXT:    [[NITER_NSUB:%.*]] = sub i3 [[NITER]], 1
+; UNROLL-4-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i3 [[NITER]], 1
 ; UNROLL-4-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT]]
 ; UNROLL-4-NEXT:    [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
 ; UNROLL-4-NEXT:    [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; UNROLL-4-NEXT:    [[NITER_NSUB_1:%.*]] = sub i3 [[NITER_NSUB]], 1
+; UNROLL-4-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i3 [[NITER_NEXT]], 1
 ; UNROLL-4-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_1]]
 ; UNROLL-4-NEXT:    [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
 ; UNROLL-4-NEXT:    [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; UNROLL-4-NEXT:    [[NITER_NSUB_2:%.*]] = sub i3 [[NITER_NSUB_1]], 1
+; UNROLL-4-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i3 [[NITER_NEXT_1]], 1
 ; UNROLL-4-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; UNROLL-4-NEXT:    [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
 ; UNROLL-4-NEXT:    [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_3]] = add i64 [[INDVARS_IV_NEXT_2]], 1
-; UNROLL-4-NEXT:    [[NITER_NSUB_3]] = sub i3 [[NITER_NSUB_2]], 1
-; UNROLL-4-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NSUB_3]], 0
+; UNROLL-4-NEXT:    [[NITER_NEXT_3]] = add i3 [[NITER_NEXT_2]], 1
+; UNROLL-4-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; UNROLL-4-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL-4:       for.end.loopexit.unr-lcssa.loopexit:
 ; UNROLL-4-NEXT:    [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
@@ -115,15 +115,15 @@ define i3 @test(i3* %a, i3 %n) {
 ; UNROLL-4:       for.body.epil:
 ; UNROLL-4-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
 ; UNROLL-4-NEXT:    [[SUM_02_EPIL:%.*]] = phi i3 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
-; UNROLL-4-NEXT:    [[EPIL_ITER:%.*]] = phi i3 [ [[XTRAITER]], [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ]
+; UNROLL-4-NEXT:    [[EPIL_ITER:%.*]] = phi i3 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
 ; UNROLL-4-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_EPIL]]
 ; UNROLL-4-NEXT:    [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_EPIL]], align 1
 ; UNROLL-4-NEXT:    [[ADD_EPIL]] = add nsw i3 [[TMP6]], [[SUM_02_EPIL]]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
 ; UNROLL-4-NEXT:    [[LFTR_WIDEIV_EPIL:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i3
 ; UNROLL-4-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i3 [[LFTR_WIDEIV_EPIL]], [[N]]
-; UNROLL-4-NEXT:    [[EPIL_ITER_SUB]] = sub i3 [[EPIL_ITER]], 1
-; UNROLL-4-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i3 [[EPIL_ITER_SUB]], 0
+; UNROLL-4-NEXT:    [[EPIL_ITER_NEXT]] = add i3 [[EPIL_ITER]], 1
+; UNROLL-4-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i3 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; UNROLL-4-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
 ; UNROLL-4:       for.end.loopexit.epilog-lcssa:
 ; UNROLL-4-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i3 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 89aa49dd71a0..370e9a4c9e63 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -19,7 +19,7 @@ define i32 @test1(i32* nocapture %a, i64 %n) {
 ; CHECK:       header:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
 ; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ]
 ; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
 ; CHECK:       for.exiting_block:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N]], 42
@@ -92,8 +92,8 @@ define i32 @test1(i32* nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
 ; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
-; CHECK-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
-; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
 ; CHECK:       latchexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
@@ -108,7 +108,7 @@ define i32 @test1(i32* nocapture %a, i64 %n) {
 ; CHECK:       header.epil:
 ; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; CHECK:       for.exiting_block.epil:
 ; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
@@ -118,8 +118,8 @@ define i32 @test1(i32* nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
 ; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       latchexit.epilog-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
index 888656387dc3..bcc3020ce6fd 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
@@ -97,22 +97,22 @@ define dso_local void @cannotProveDivisibleTC(i8* noalias nocapture %a, i8* noal
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[I_011]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP2]], 3
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[I_011]]
 ; CHECK-NEXT:    store i8 [[ADD]], i8* [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[INC]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX_1]], align 1
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add i8 [[TMP3]], 3
 ; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[INC]]
 ; CHECK-NEXT:    store i8 [[ADD_1]], i8* [[ARRAYIDX4_1]], align 1
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[INC]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1]] = sub i32 [[NITER_NSUB]], 1
-; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NSUB_1]], 0
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER_NEXT]], 1
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index f0cbf13fba80..eb963303a361 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -66,7 +66,7 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
@@ -95,8 +95,8 @@ define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %
 ; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
 ; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
-; CHECK-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index bdd1f4d4b921..64e1774672b0 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -22,15 +22,15 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[ADD8]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[ADD8_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[ADD8_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
@@ -72,7 +72,7 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_2]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
@@ -99,8 +99,7 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_UNR]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
@@ -118,8 +117,7 @@ define void @test1(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[ADD8_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.outer.epil.2:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
@@ -202,23 +200,23 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9_1:%.*]] = add nuw nsw i32 [[ADD9]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9_1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9_2:%.*]] = add nuw nsw i32 [[ADD9_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9_2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9_3]] = add nuw i32 [[ADD9_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
@@ -256,7 +254,7 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       for.end10.loopexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], [[FOR_LATCH]] ]
@@ -284,8 +282,7 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9_EPIL]]
@@ -304,8 +301,7 @@ define void @test2(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_1:%.*]] = add nuw i32 [[ADD9_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.outer.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD9_EPIL_1]]
@@ -626,15 +622,15 @@ define i32 @test6() #0 {
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[INC5_SINK9:%.*]] = phi i32 [ 2, [[ENTRY_NEW]] ], [ [[INC5_3:%.*]], [[FOR_LATCH:%.*]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 4, [[ENTRY_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[INC5:%.*]] = add nuw nsw i32 [[INC5_SINK9]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub nuw nsw i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[INC5_1:%.*]] = add nuw nsw i32 [[INC5]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub nuw nsw i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[INC5_2:%.*]] = add nuw nsw i32 [[INC5_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub nuw nsw i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[INC5_3]] = add nuw nsw i32 [[INC5_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub nuw nsw i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add nuw nsw i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[INC_SINK8:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
@@ -725,31 +721,31 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[ADD]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_1]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX2_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_1]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_2]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX2_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_2]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[ADD_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_3]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX2_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.latch:
 ; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], [[FOR_INNER]] ]
@@ -760,7 +756,7 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9]], [[FOR_INNER]] ]
@@ -817,8 +813,7 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK:       for.latch.epil:
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], i32* [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL]]
@@ -839,8 +834,7 @@ define void @test7(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK:       for.latch.epil.1:
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.outer.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL_1]]
@@ -928,31 +922,31 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTEST_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTEST_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX6]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[ADD]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_1]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX6_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_1]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_2]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_2]]
 ; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[ADD_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_3]]
 ; CHECK-NEXT:    store i32 2, i32* [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9:%.*]], [[FOR_INNER]] ]
@@ -990,7 +984,7 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], i32* [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], i32* [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], i32* [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       for.cleanup.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], [[FOR_LATCH]] ]
@@ -1020,8 +1014,7 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK:       for.latch.epil:
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], [[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], i32* [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL]]
@@ -1042,8 +1035,7 @@ define void @test8(i32 %I, i32 %E, i32* noalias nocapture %A, i32* noalias nocap
 ; CHECK:       for.latch.epil.1:
 ; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], [[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_CLEANUP_EPILOG_LCSSA]]
 ; CHECK:       for.outer.epil.2:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD_EPIL_1]]
@@ -1141,15 +1133,15 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    br label [[FOR_OUTER:%.*]]
 ; CHECK:       for.outer:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1
 ; CHECK-NEXT:    [[ADD8_1:%.*]] = add nuw nsw i32 [[ADD8]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[ADD8_2:%.*]] = add nuw nsw i32 [[ADD8_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[ADD8_3]] = add nuw i32 [[ADD8_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_INNER:%.*]]
 ; CHECK:       for.inner:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
@@ -1195,7 +1187,7 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_2]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ]
@@ -1223,8 +1215,7 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_UNR]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX6_EPIL]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
 ; CHECK:       for.outer.epil.1:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_1:%.*]]
@@ -1243,8 +1234,7 @@ define void @test9(i32 %I, i32 %E, i32* nocapture %A, i16* nocapture readonly %B
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD8_EPIL_1:%.*]] = add nuw i32 [[ADD8_EPIL]], 1
-; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
 ; CHECK:       for.outer.epil.2:
 ; CHECK-NEXT:    br label [[FOR_INNER_EPIL_2:%.*]]
@@ -1325,15 +1315,15 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[STOREMERGE82:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INC25_3:%.*]], [[FOR_INC24:%.*]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 4, [[ENTRY_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC24]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC24]] ]
 ; CHECK-NEXT:    [[INC25:%.*]] = add nuw nsw i64 [[STOREMERGE82]], 1
-; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub nuw nsw i64 [[NITER]], 1
+; CHECK-NEXT:    [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
 ; CHECK-NEXT:    [[INC25_1:%.*]] = add nuw nsw i64 [[INC25]], 1
-; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub nuw nsw i64 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1
 ; CHECK-NEXT:    [[INC25_2:%.*]] = add nuw nsw i64 [[INC25_1]], 1
-; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub nuw nsw i64 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1
 ; CHECK-NEXT:    [[INC25_3]] = add nuw nsw i64 [[INC25_2]], 1
-; CHECK-NEXT:    [[NITER_NSUB_3]] = sub nuw nsw i64 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NEXT_3]] = add nuw nsw i64 [[NITER_NEXT_2]], 1
 ; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
 ; CHECK:       for.body2:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i64 [ 4, [[FOR_BODY]] ], [ [[DEC:%.*]], [[FOR_INC21_3:%.*]] ]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 254a4c301483..f85a8c66ab9d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -40,7 +40,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00, float 3.000000e+00, float 3.500000e+00, float 4.000000e+00, float 4.500000e+00>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
@@ -109,8 +109,8 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_3]], <8 x float>* [[TMP36]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 128
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <8 x float> [[VEC_IND]], <float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01>
-; AUTO_VEC-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
-; AUTO_VEC-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
+; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
+; AUTO_VEC-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; AUTO_VEC-NEXT:    br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; AUTO_VEC:       middle.block.unr-lcssa:
 ; AUTO_VEC-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ]
@@ -120,7 +120,7 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       vector.body.epil:
 ; AUTO_VEC-NEXT:    [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND_EPIL:%.*]] = phi <8 x float> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
+; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
@@ -138,8 +138,8 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_EPIL]], <8 x float>* [[TMP44]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
-; AUTO_VEC-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; AUTO_VEC-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP2:![0-9]+]]
 ; AUTO_VEC:       middle.block:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[ZEXT]]
@@ -208,7 +208,7 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01
@@ -241,8 +241,8 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    store float [[CONV1_6]], float* [[ARRAYIDX_7]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
-; AUTO_VEC-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
-; AUTO_VEC-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; AUTO_VEC-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
 ; AUTO_VEC-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
 ; AUTO_VEC:       for.end.loopexit.unr-lcssa:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY]] ]
@@ -252,13 +252,13 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC:       for.body.epil:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[X_06_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_EPIL]]
 ; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], float* [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
-; AUTO_VEC-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; AUTO_VEC-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop [[LOOP6:![0-9]+]]
 ; AUTO_VEC:       for.end:
 ; AUTO_VEC-NEXT:    ret void
@@ -310,7 +310,7 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_3:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
@@ -379,8 +379,8 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_3]], <4 x double>* [[TMP36]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], <double 1.920000e+02, double 1.920000e+02, double 1.920000e+02, double 1.920000e+02>
-; AUTO_VEC-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
-; AUTO_VEC-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
+; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
+; AUTO_VEC-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
 ; AUTO_VEC-NEXT:    br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; AUTO_VEC:       middle.block.unr-lcssa:
 ; AUTO_VEC-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ]
@@ -390,7 +390,7 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       vector.body.epil:
 ; AUTO_VEC-NEXT:    [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND_EPIL:%.*]] = phi <4 x double> [ [[VEC_IND_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[VEC_IND_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
+; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
@@ -408,8 +408,8 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_EPIL]], <4 x double>* [[TMP44]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
-; AUTO_VEC-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; AUTO_VEC-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP8:![0-9]+]]
 ; AUTO_VEC:       middle.block:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
@@ -462,7 +462,7 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[I]]
 ; AUTO_VEC-NEXT:    store double [[J]], double* [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT:%.*]] = or i64 [[I]], 1
@@ -495,8 +495,8 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    store double [[J_NEXT_6]], double* [[T0_7]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8
 ; AUTO_VEC-NEXT:    [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
-; AUTO_VEC-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; AUTO_VEC-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
 ; AUTO_VEC-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY]]
 ; AUTO_VEC:       for.end.unr-lcssa:
 ; AUTO_VEC-NEXT:    [[J_LCSSA_PH:%.*]] = phi double [ undef, [[ENTRY:%.*]] ], [ [[J_NEXT_6]], [[FOR_BODY]] ]
@@ -507,13 +507,13 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       for.body.epil:
 ; AUTO_VEC-NEXT:    [[I_EPIL:%.*]] = phi i64 [ [[I_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[I_UNR]], [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[J_EPIL:%.*]] = phi double [ [[J_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[J_UNR]], [[FOR_END_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_END_UNR_LCSSA]] ]
+; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[T0_EPIL:%.*]] = getelementptr double, double* [[A]], i64 [[I_EPIL]]
 ; AUTO_VEC-NEXT:    store double [[J_EPIL]], double* [[T0_EPIL]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_EPIL]] = add nuw nsw i64 [[I_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT_EPIL]] = fadd double [[J_EPIL]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; AUTO_VEC-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; AUTO_VEC-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY_EPIL]], !llvm.loop [[LOOP10:![0-9]+]]
 ; AUTO_VEC:       for.end:
 ; AUTO_VEC-NEXT:    [[J_LCSSA:%.*]] = phi double [ [[J_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[J_EPIL]], [[FOR_BODY_EPIL]] ]
@@ -571,7 +571,7 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 4.300000e+01, float 8.500000e+01, float 1.270000e+02, float 1.690000e+02, float 2.110000e+02, float 2.530000e+02, float 2.950000e+02>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT_1:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[VECTOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
@@ -630,8 +630,8 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    store <8 x float> [[TMP33]], <8 x float>* [[TMP37]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1]] = fadd reassoc <8 x float> [[STEP_ADD3_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[NITER_NSUB_1]] = add i64 [[NITER]], -2
-; AUTO_VEC-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NSUB_1]], 0
+; AUTO_VEC-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; AUTO_VEC-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; AUTO_VEC-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; AUTO_VEC:       middle.block.unr-lcssa:
 ; AUTO_VEC-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 1ab06e8ba51b..77ab42885ff3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -1450,7 +1450,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH_NEW]] ], [ [[PTR_IND_7:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_7:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[VECTOR_BODY]] ]
+; AVX512-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX]]
 ; AVX512-NEXT:    [[TMP17:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>
 ; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]]
@@ -1547,8 +1547,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_7]], <16 x float*> [[TMP56]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !5, !noalias !7
 ; AVX512-NEXT:    [[INDEX_NEXT_7]] = add nuw i64 [[INDEX]], 128
 ; AVX512-NEXT:    [[PTR_IND_7]] = getelementptr float, float* [[POINTER_PHI]], i64 2048
-; AVX512-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
-; AVX512-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; AVX512-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
+; AVX512-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
 ; AVX512-NEXT:    br i1 [[NITER_NCMP_7]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; AVX512:       middle.block.unr-lcssa:
 ; AVX512-NEXT:    [[POINTER_PHI_UNR:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH]] ], [ [[PTR_IND_7]], [[VECTOR_BODY]] ]
@@ -1558,7 +1558,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl
 ; AVX512:       vector.body.epil:
 ; AVX512-NEXT:    [[POINTER_PHI_EPIL:%.*]] = phi float* [ [[PTR_IND_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[POINTER_PHI_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AVX512-NEXT:    [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[INDEX_UNR]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
-; AVX512-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ], [ [[XTRAITER]], [[MIDDLE_BLOCK_UNR_LCSSA]] ]
+; AVX512-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[VECTOR_BODY_EPIL]] ], [ 0, [[MIDDLE_BLOCK_UNR_LCSSA]] ]
 ; AVX512-NEXT:    [[NEXT_GEP_EPIL:%.*]] = getelementptr float, float* [[PTR]], i64 [[INDEX_EPIL]]
 ; AVX512-NEXT:    [[TMP57:%.*]] = getelementptr float, float* [[POINTER_PHI_EPIL]], <16 x i64> <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>
 ; AVX512-NEXT:    [[TMP58:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP_EPIL]], i64 [[IDXPROM]]
@@ -1571,8 +1571,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly %ptr, fl
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD15_EPIL]], <16 x float*> [[TMP61]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !5, !noalias !7
 ; AVX512-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16
 ; AVX512-NEXT:    [[PTR_IND_EPIL]] = getelementptr float, float* [[POINTER_PHI_EPIL]], i64 256
-; AVX512-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
-; AVX512-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; AVX512-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
+; AVX512-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
 ; AVX512-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP11:![0-9]+]]
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index 70238d98b2cd..ba9d8c467c2e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -90,15 +90,15 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]]
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ], [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ]
+; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_PROL]]
 ; CHECK-NEXT:    [[T0_PROL:%.*]] = load double, double* [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = fmul fast double [[T0_PROL]], [[TMP27]]
 ; CHECK-NEXT:    [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_PROL]]
 ; CHECK-NEXT:    store double [[TMP28]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
-; CHECK-NEXT:    [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1
-; CHECK-NEXT:    [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
 ; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       for.body.prol.loopexit:
 ; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER18]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]


        


More information about the llvm-commits mailing list