[llvm] f453e23 - Autogen a bunch of unrolling tests for ease of update

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 12 10:34:56 PST 2021


Author: Philip Reames
Date: 2021-11-12T10:34:50-08:00
New Revision: f453e23e67e260b375ea222dab2483c1663aa5c9

URL: https://github.com/llvm/llvm-project/commit/f453e23e67e260b375ea222dab2483c1663aa5c9
DIFF: https://github.com/llvm/llvm-project/commit/f453e23e67e260b375ea222dab2483c1663aa5c9.diff

LOG: Autogen a bunch of unrolling tests for ease of update

Added: 
    

Modified: 
    llvm/test/DebugInfo/unrolled-loop-remainder.ll
    llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
    llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
    llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
    llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
    llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
    llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/DebugInfo/unrolled-loop-remainder.ll b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
index 1256063b5c0db..83c30dec780d6 100644
--- a/llvm/test/DebugInfo/unrolled-loop-remainder.ll
+++ b/llvm/test/DebugInfo/unrolled-loop-remainder.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-unroll -unroll-runtime -unroll-allow-remainder -unroll-count=4 -unroll-remainder -S %s -o - | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -9,17 +10,100 @@ target triple = "x86_64-unknown-linux-gnu"
 ; used to cause an assert, but also test that the unrolled backwards branches
 ; have the same DILocation.
 
-; CHECK-LABEL: func_c
-; CHECK-LABEL: for.body.lr.ph:
-; CHECK: br i1 %[[CMP0:.*]], label %[[PRE:.*]], label %for.body.prol.loopexit, !dbg !24
-; CHECK-LABEL: for.body:
-; CHECK: br i1 %[[CMP1:.*]], label %[[CRIT_EDGE:.*]], label %for.body, !dbg !24, !llvm.loop !30
-; CHECK-LABEL: for.cond.for.end_crit_edge:
-; CHECK: br label %for.end, !dbg !24
-; CHECK-LABEL: for.body.prol.1:
-; CHECK: br i1 %[[CMP2:.*]], label %for.body.prol.2, label %[[EXIT:.*]], !dbg !24
-; CHECK-LABEL: for.body.prol.2:
 define i32 @func_c() local_unnamed_addr #0 !dbg !14 {
+;
+; CHECK-LABEL: @func_c(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTPR:%.*]] = load i32, i32* @b, align 4, !dbg [[DBG17:![0-9]+]], !tbaa [[TBAA20:![0-9]+]]
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp eq i32 [[DOTPR]], 0, !dbg [[DBG24:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TOBOOL1]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32*, i32** @a, align 8, !dbg [[DBG25:![0-9]+]], !tbaa [[TBAA26:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -2, [[DOTPR]], !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], -2, !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[DOTPR]], [[TMP1]], !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP0]], 1, !dbg [[DBG24]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw i32 [[TMP3]], 1, !dbg [[DBG24]]
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP4]], 3, !dbg [[DBG24]]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_PROL:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.prol:
+; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A_PROMOTED]], i64 1, !dbg [[DBG28:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_PROL:%.*]] = sext i32 [[TMP5]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[CONV_PROL]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_PROL:%.*]] = add nsw i32 [[DOTPR]], 2, !dbg [[DBG29:![0-9]+]]
+; CHECK-NEXT:    [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1, !dbg [[DBG24]]
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL_1:%.*]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[DOTLCSSA_UNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP20:%.*]], [[FOR_BODY_PROL_1]] ], [ [[TMP22:%.*]], [[FOR_BODY_PROL_2:%.*]] ]
+; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi i32* [ [[TMP6]], [[FOR_BODY_PROL]] ], [ [[TMP20]], [[FOR_BODY_PROL_1]] ], [ [[TMP22]], [[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    [[DOTUNR1_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[ADD_PROL_1:%.*]], [[FOR_BODY_PROL_1]] ], [ [[ADD_PROL_2:%.*]], [[FOR_BODY_PROL_2]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.loopexit:
+; CHECK-NEXT:    [[DOTLCSSA_UNR:%.*]] = phi i32* [ undef, [[FOR_BODY_LR_PH]] ], [ [[DOTLCSSA_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[DOTUNR:%.*]] = phi i32* [ [[A_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[DOTUNR1:%.*]] = phi i32 [ [[DOTPR]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR1_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP3]], 3, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body.lr.ph.new:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg [[DBG24]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i32* [ [[DOTUNR]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ], !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[DOTUNR1]], [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[CONV]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP9]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_1:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[CONV_1]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[ADD]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_2:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[CONV_2]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[ADD_1]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_3:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP17]] = inttoptr i64 [[CONV_3]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[ADD_2]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[ADD_3]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA:%.*]], label [[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK:       for.cond.for.end_crit_edge.unr-lcssa:
+; CHECK-NEXT:    [[DOTLCSSA_PH:%.*]] = phi i32* [ [[TMP17]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_FOR_END_CRIT_EDGE]], !dbg [[DBG24]]
+; CHECK:       for.cond.for.end_crit_edge:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32* [ [[DOTLCSSA_UNR]], [[FOR_BODY_PROL_LOOPEXIT]] ], [ [[DOTLCSSA_PH]], [[FOR_COND_FOR_END_CRIT_EDGE_UNR_LCSSA]] ], !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[TMP2]], 2, !dbg [[DBG24]]
+; CHECK-NEXT:    store i32* [[DOTLCSSA]], i32** @a, align 8, !dbg [[DBG25]], !tbaa [[TBAA26]]
+; CHECK-NEXT:    store i32 [[TMP18]], i32* @b, align 4, !dbg [[DBG33:![0-9]+]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG24]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 undef, !dbg [[DBG34:![0-9]+]]
+; CHECK:       for.body.prol.1:
+; CHECK-NEXT:    [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX_PROL_1]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_PROL_1:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP20]] = inttoptr i64 [[CONV_PROL_1]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_PROL_1]] = add nsw i32 [[ADD_PROL]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1, !dbg [[DBG24]]
+; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0, !dbg [[DBG24]]
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY_PROL_2]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]], !dbg [[DBG24]]
+; CHECK:       for.body.prol.2:
+; CHECK-NEXT:    [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 1, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX_PROL_2]], align 4, !dbg [[DBG28]], !tbaa [[TBAA20]]
+; CHECK-NEXT:    [[CONV_PROL_2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG28]]
+; CHECK-NEXT:    [[TMP22]] = inttoptr i64 [[CONV_PROL_2]] to i32*, !dbg [[DBG28]]
+; CHECK-NEXT:    [[ADD_PROL_2]] = add nsw i32 [[ADD_PROL_1]], 2, !dbg [[DBG29]]
+; CHECK-NEXT:    br label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
+;
 entry:
   %.pr = load i32, i32* @b, align 4, !dbg !17, !tbaa !20
   %tobool1 = icmp eq i32 %.pr, 0, !dbg !24
@@ -53,8 +137,12 @@ for.end:
   ret i32 undef, !dbg !33
 }
 
-; CHECK-LABEL: func_d
 define void @func_d() local_unnamed_addr #1 !dbg !34 {
+;
+; CHECK-LABEL: @func_d(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void, !dbg [[DBG38:![0-9]+]]
+;
 entry:
   ret void, !dbg !37
 }

diff  --git a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
index a87b16a28b7c6..3e611430d69ee 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
@@ -1,14 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -loop-unroll -S -unroll-count=4 | FileCheck %s
 ; Test phi update after partial unroll.
 
 declare i1 @check() nounwind
 
-; CHECK: @test
-; CHECK: if.else:
-; CHECK: if.then.loopexit
-; CHECK: %sub5.lcssa = phi i32 [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ], [ %sub{{.*}}, %if.else{{.*}} ]
-; CHECK: if.else.3
 define void @test1(i32 %i, i32 %j) nounwind uwtable ssp {
+;
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND1:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND1]], label [[IF_THEN:%.*]], label [[IF_ELSE_LR_PH:%.*]]
+; CHECK:       if.else.lr.ph:
+; CHECK-NEXT:    br label [[IF_ELSE:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[SUB:%.*]] = phi i32 [ [[I:%.*]], [[IF_ELSE_LR_PH]] ], [ [[SUB5_3:%.*]], [[IF_ELSE_3:%.*]] ]
+; CHECK-NEXT:    [[SUB5:%.*]] = sub i32 [[SUB]], [[J:%.*]]
+; CHECK-NEXT:    [[COND2:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2]], label [[IF_THEN_LOOPEXIT:%.*]], label [[IF_ELSE_1:%.*]]
+; CHECK:       if.then.loopexit:
+; CHECK-NEXT:    [[SUB5_LCSSA:%.*]] = phi i32 [ [[SUB5]], [[IF_ELSE]] ], [ [[SUB5_1:%.*]], [[IF_ELSE_1]] ], [ [[SUB5_2:%.*]], [[IF_ELSE_2:%.*]] ], [ [[SUB5_3]], [[IF_ELSE_3]] ]
+; CHECK-NEXT:    br label [[IF_THEN]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[I_TR:%.*]] = phi i32 [ [[I]], [[ENTRY:%.*]] ], [ [[SUB5_LCSSA]], [[IF_THEN_LOOPEXIT]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       if.else.1:
+; CHECK-NEXT:    [[SUB5_1]] = sub i32 [[SUB5]], [[J]]
+; CHECK-NEXT:    [[COND2_1:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_1]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE_2]]
+; CHECK:       if.else.2:
+; CHECK-NEXT:    [[SUB5_2]] = sub i32 [[SUB5_1]], [[J]]
+; CHECK-NEXT:    [[COND2_2:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_2]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE_3]]
+; CHECK:       if.else.3:
+; CHECK-NEXT:    [[SUB5_3]] = sub i32 [[SUB5_2]], [[J]]
+; CHECK-NEXT:    [[COND2_3:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_3]], label [[IF_THEN_LOOPEXIT]], label [[IF_ELSE]], !llvm.loop [[LOOP0:![0-9]+]]
+;
 entry:
   %cond1 = call zeroext i1 @check()
   br i1 %cond1, label %if.then, label %if.else.lr.ph
@@ -30,11 +57,60 @@ if.then:                                          ; preds = %if.else, %entry
 
 ; PR7318: assertion failure after doing a simple loop unroll
 ;
-; CHECK-LABEL: @test2(
-; CHECK: bb1.bb2_crit_edge:
-; CHECK: %.lcssa = phi i32 [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ], [ %{{[2468]}}, %bb1{{.*}} ]
-; CHECK: bb1.3:
 define i32 @test2(i32* nocapture %p, i32 %n) nounwind readonly {
+;
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP0]], label [[BB_NPH:%.*]], label [[BB2:%.*]]
+; CHECK:       bb.nph:
+; CHECK-NEXT:    [[TMP:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH]] ], [ [[INDVAR_NEXT_3:%.*]], [[BB1_3:%.*]] ]
+; CHECK-NEXT:    [[S_01:%.*]] = phi i32 [ 0, [[BB_NPH]] ], [ [[TMP8:%.*]], [[BB1_3]] ]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SCEVGEP]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[TMP1]], [[S_01]]
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[INDVAR_NEXT:%.*]] = add nuw nsw i64 [[INDVAR]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVAR_NEXT]], [[TMP]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB_1:%.*]], label [[BB1_BB2_CRIT_EDGE:%.*]]
+; CHECK:       bb1.bb2_crit_edge:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP2]], [[BB1]] ], [ [[TMP4:%.*]], [[BB1_1:%.*]] ], [ [[TMP6:%.*]], [[BB1_2:%.*]] ], [ [[TMP8]], [[BB1_3]] ]
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[S_0_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA]], [[BB1_BB2_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S_0_LCSSA]]
+; CHECK:       bb.1:
+; CHECK-NEXT:    [[SCEVGEP_1:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR_NEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[SCEVGEP_1]], align 1
+; CHECK-NEXT:    [[TMP4]] = add nsw i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    br label [[BB1_1]]
+; CHECK:       bb1.1:
+; CHECK-NEXT:    [[INDVAR_NEXT_1:%.*]] = add nuw nsw i64 [[INDVAR_NEXT]], 1
+; CHECK-NEXT:    [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVAR_NEXT_1]], [[TMP]]
+; CHECK-NEXT:    br i1 [[EXITCOND_1]], label [[BB_2:%.*]], label [[BB1_BB2_CRIT_EDGE]]
+; CHECK:       bb.2:
+; CHECK-NEXT:    [[SCEVGEP_2:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR_NEXT_1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[SCEVGEP_2]], align 1
+; CHECK-NEXT:    [[TMP6]] = add nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    br label [[BB1_2]]
+; CHECK:       bb1.2:
+; CHECK-NEXT:    [[INDVAR_NEXT_2:%.*]] = add nuw nsw i64 [[INDVAR_NEXT_1]], 1
+; CHECK-NEXT:    [[EXITCOND_2:%.*]] = icmp ne i64 [[INDVAR_NEXT_2]], [[TMP]]
+; CHECK-NEXT:    br i1 [[EXITCOND_2]], label [[BB_3:%.*]], label [[BB1_BB2_CRIT_EDGE]]
+; CHECK:       bb.3:
+; CHECK-NEXT:    [[SCEVGEP_3:%.*]] = getelementptr i32, i32* [[P]], i64 [[INDVAR_NEXT_2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[SCEVGEP_3]], align 1
+; CHECK-NEXT:    [[TMP8]] = add nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    br label [[BB1_3]]
+; CHECK:       bb1.3:
+; CHECK-NEXT:    [[INDVAR_NEXT_3]] = add i64 [[INDVAR_NEXT_2]], 1
+; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp ne i64 [[INDVAR_NEXT_3]], [[TMP]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[BB]], label [[BB1_BB2_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]]
+;
 entry:
   %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
   br i1 %0, label %bb.nph, label %bb2
@@ -67,11 +143,67 @@ bb2:                                              ; preds = %bb1.bb2_crit_edge,
 
 ; Check phi update for loop with an early-exit.
 ;
-; CHECK-LABEL: @test3(
-; CHECK: return.loopexit:
-; CHECK: %tmp7.i.lcssa = phi i32 [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ], [ %tmp7.i{{.*}}, %land.lhs.true{{.*}} ]
-; CHECK: exit.3:
 define i32 @test3() nounwind uwtable ssp align 2 {
+;
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND1:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND1]], label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[COND2:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2]], label [[EXIT:%.*]], label [[DO_COND:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP7_I:%.*]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND]], label [[LAND_LHS_TRUE:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT:%.*]], label [[DO_COND]]
+; CHECK:       do.cond:
+; CHECK-NEXT:    [[COND3:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND3]], label [[DO_END:%.*]], label [[DO_BODY_1:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return.loopexit:
+; CHECK-NEXT:    [[TMP7_I_LCSSA:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ [[TMP7_I_1:%.*]], [[LAND_LHS_TRUE_1:%.*]] ], [ [[TMP7_I_2:%.*]], [[LAND_LHS_TRUE_2:%.*]] ], [ [[TMP7_I_3:%.*]], [[LAND_LHS_TRUE_3:%.*]] ]
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[DO_END]] ], [ 0, [[ENTRY:%.*]] ], [ [[TMP7_I_LCSSA]], [[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+; CHECK:       do.body.1:
+; CHECK-NEXT:    [[COND2_1:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_1]], label [[EXIT_1:%.*]], label [[DO_COND_1:%.*]]
+; CHECK:       exit.1:
+; CHECK-NEXT:    [[TMP7_I_1]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_1]], label [[LAND_LHS_TRUE_1]]
+; CHECK:       land.lhs.true.1:
+; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_1]]
+; CHECK:       do.cond.1:
+; CHECK-NEXT:    [[COND3_1:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND3_1]], label [[DO_END]], label [[DO_BODY_2:%.*]]
+; CHECK:       do.body.2:
+; CHECK-NEXT:    [[COND2_2:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_2]], label [[EXIT_2:%.*]], label [[DO_COND_2:%.*]]
+; CHECK:       exit.2:
+; CHECK-NEXT:    [[TMP7_I_2]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_2]], label [[LAND_LHS_TRUE_2]]
+; CHECK:       land.lhs.true.2:
+; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_2]]
+; CHECK:       do.cond.2:
+; CHECK-NEXT:    [[COND3_2:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND3_2]], label [[DO_END]], label [[DO_BODY_3:%.*]]
+; CHECK:       do.body.3:
+; CHECK-NEXT:    [[COND2_3:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND2_3]], label [[EXIT_3:%.*]], label [[DO_COND_3:%.*]]
+; CHECK:       exit.3:
+; CHECK-NEXT:    [[TMP7_I_3]] = load i32, i32* undef, align 8
+; CHECK-NEXT:    br i1 undef, label [[DO_COND_3]], label [[LAND_LHS_TRUE_3]]
+; CHECK:       land.lhs.true.3:
+; CHECK-NEXT:    br i1 true, label [[RETURN_LOOPEXIT]], label [[DO_COND_3]]
+; CHECK:       do.cond.3:
+; CHECK-NEXT:    [[COND3_3:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[COND3_3]], label [[DO_END]], label [[DO_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+;
 entry:
   %cond1 = call zeroext i1 @check()
   br i1 %cond1, label %return, label %if.end

diff  --git a/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll b/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
index 8344993a6fd2f..be4b6ff64fdde 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-09-PhiUpdate.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S < %s -instcombine -inline -jump-threading -loop-unroll -unroll-count=4 | FileCheck %s
 ;
 ; This is a test case that required a number of setup passes because
@@ -12,12 +13,72 @@ declare i32 @getval() nounwind
 ; Check that the loop exit merges values from all the iterations. This
 ; could be a tad fragile, but it's a good test.
 ;
-; CHECK-LABEL: @foo(
-; CHECK: return:
-; CHECK: %retval.0 = phi i32 [ %tmp7.i, %land.lhs.true ], [ 0, %do.cond ], [ %tmp7.i.1, %land.lhs.true.1 ], [ 0, %do.cond.1 ], [ %tmp7.i.2, %land.lhs.true.2 ], [ 0, %do.cond.2 ], [ %tmp7.i.3, %land.lhs.true.3 ], [ 0, %do.cond.3 ]
-; CHECK-NOT: @bar(
-; CHECK: bar.exit.3
 define i32 @foo() uwtable ssp align 2 {
+;
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  if.end:
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @getval()
+; CHECK-NEXT:    br label [[LAND_LHS_TRUE_I:%.*]]
+; CHECK:       land.lhs.true.i:
+; CHECK-NEXT:    [[CMP4_I:%.*]] = call zeroext i1 @check() #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP4_I]], label [[BAR_EXIT:%.*]], label [[DO_COND:%.*]]
+; CHECK:       bar.exit:
+; CHECK-NEXT:    [[TMP7_I:%.*]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[TMP7_I]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[DO_COND]], label [[LAND_LHS_TRUE:%.*]]
+; CHECK:       land.lhs.true:
+; CHECK-NEXT:    [[CALL10:%.*]] = call i32 @getval()
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[CALL10]], 0
+; CHECK-NEXT:    br i1 [[CMP11]], label [[RETURN:%.*]], label [[DO_COND]]
+; CHECK:       do.cond:
+; CHECK-NEXT:    [[CMP18:%.*]] = icmp sgt i32 [[CALL2]], -1
+; CHECK-NEXT:    br i1 [[CMP18]], label [[LAND_LHS_TRUE_I_1:%.*]], label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[TMP7_I]], [[LAND_LHS_TRUE]] ], [ 0, [[DO_COND]] ], [ [[TMP7_I_1:%.*]], [[LAND_LHS_TRUE_1:%.*]] ], [ 0, [[DO_COND_1:%.*]] ], [ [[TMP7_I_2:%.*]], [[LAND_LHS_TRUE_2:%.*]] ], [ 0, [[DO_COND_2:%.*]] ], [ [[TMP7_I_3:%.*]], [[LAND_LHS_TRUE_3:%.*]] ], [ 0, [[DO_COND_3:%.*]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+; CHECK:       land.lhs.true.i.1:
+; CHECK-NEXT:    [[CMP4_I_1:%.*]] = call zeroext i1 @check() #[[ATTR0]]
+; CHECK-NEXT:    br i1 [[CMP4_I_1]], label [[BAR_EXIT_1:%.*]], label [[DO_COND_1]]
+; CHECK:       bar.exit.1:
+; CHECK-NEXT:    [[TMP7_I_1]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[CMP_NOT_1:%.*]] = icmp eq i32 [[TMP7_I_1]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT_1]], label [[DO_COND_1]], label [[LAND_LHS_TRUE_1]]
+; CHECK:       land.lhs.true.1:
+; CHECK-NEXT:    [[CALL10_1:%.*]] = call i32 @getval()
+; CHECK-NEXT:    [[CMP11_1:%.*]] = icmp eq i32 [[CALL10_1]], 0
+; CHECK-NEXT:    br i1 [[CMP11_1]], label [[RETURN]], label [[DO_COND_1]]
+; CHECK:       do.cond.1:
+; CHECK-NEXT:    [[CMP18_1:%.*]] = icmp sgt i32 [[CALL2]], -1
+; CHECK-NEXT:    br i1 [[CMP18_1]], label [[LAND_LHS_TRUE_I_2:%.*]], label [[RETURN]]
+; CHECK:       land.lhs.true.i.2:
+; CHECK-NEXT:    [[CMP4_I_2:%.*]] = call zeroext i1 @check() #[[ATTR0]]
+; CHECK-NEXT:    br i1 [[CMP4_I_2]], label [[BAR_EXIT_2:%.*]], label [[DO_COND_2]]
+; CHECK:       bar.exit.2:
+; CHECK-NEXT:    [[TMP7_I_2]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[CMP_NOT_2:%.*]] = icmp eq i32 [[TMP7_I_2]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT_2]], label [[DO_COND_2]], label [[LAND_LHS_TRUE_2]]
+; CHECK:       land.lhs.true.2:
+; CHECK-NEXT:    [[CALL10_2:%.*]] = call i32 @getval()
+; CHECK-NEXT:    [[CMP11_2:%.*]] = icmp eq i32 [[CALL10_2]], 0
+; CHECK-NEXT:    br i1 [[CMP11_2]], label [[RETURN]], label [[DO_COND_2]]
+; CHECK:       do.cond.2:
+; CHECK-NEXT:    [[CMP18_2:%.*]] = icmp sgt i32 [[CALL2]], -1
+; CHECK-NEXT:    br i1 [[CMP18_2]], label [[LAND_LHS_TRUE_I_3:%.*]], label [[RETURN]]
+; CHECK:       land.lhs.true.i.3:
+; CHECK-NEXT:    [[CMP4_I_3:%.*]] = call zeroext i1 @check() #[[ATTR0]]
+; CHECK-NEXT:    br i1 [[CMP4_I_3]], label [[BAR_EXIT_3:%.*]], label [[DO_COND_3]]
+; CHECK:       bar.exit.3:
+; CHECK-NEXT:    [[TMP7_I_3]] = call i32 @getval() #[[ATTR0]]
+; CHECK-NEXT:    [[CMP_NOT_3:%.*]] = icmp eq i32 [[TMP7_I_3]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT_3]], label [[DO_COND_3]], label [[LAND_LHS_TRUE_3]]
+; CHECK:       land.lhs.true.3:
+; CHECK-NEXT:    [[CALL10_3:%.*]] = call i32 @getval()
+; CHECK-NEXT:    [[CMP11_3:%.*]] = icmp eq i32 [[CALL10_3]], 0
+; CHECK-NEXT:    br i1 [[CMP11_3]], label [[RETURN]], label [[DO_COND_3]]
+; CHECK:       do.cond.3:
+; CHECK-NEXT:    [[CMP18_3:%.*]] = icmp sgt i32 [[CALL2]], -1
+; CHECK-NEXT:    br i1 [[CMP18_3]], label [[LAND_LHS_TRUE_I]], label [[RETURN]], !llvm.loop [[LOOP0:![0-9]+]]
+;
 entry:
   br i1 undef, label %return, label %if.end
 
@@ -45,6 +106,18 @@ return:                                           ; preds = %do.cond, %land.lhs.
 }
 
 define linkonce_odr i32 @bar() nounwind uwtable ssp align 2 {
+;
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  land.lhs.true:
+; CHECK-NEXT:    [[CMP4:%.*]] = call zeroext i1 @check()
+; CHECK-NEXT:    br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @getval()
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ 0, [[LAND_LHS_TRUE:%.*]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
 entry:
   br i1 undef, label %land.lhs.true, label %cond.end
 

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
index 7e8c55e7541cf..156c0ab106587 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
@@ -1,29 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S < %s -o - | FileCheck %s
 ; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S < %s -o - | FileCheck %s
 
-;CHECK-LABEL: test_three_blocks
-;CHECK: for.body.epil:
-;CHECK: if.then.epil:
-;CHECK: for.inc.epil:
-;CHECK: for.body:
-;CHECK: if.then:
-;CHECK: for.inc:
-;CHECK: for.body.epil.1:
-;CHECK: if.then.epil.1:
-;CHECK: for.inc.epil.1:
-;CHECK: for.body.epil.2:
-;CHECK: if.then.epil.2:
-;CHECK: for.inc.epil.2:
-;CHECK: if.then.1:
-;CHECK: for.inc.1:
-;CHECK: if.then.2:
-;CHECK: for.inc.2:
-;CHECK: if.then.3:
-;CHECK: for.inc.3:
 define void @test_three_blocks(i32* nocapture %Output,
-                               i32* nocapture readonly %Condition,
-                               i32* nocapture readonly %Input,
-                               i32 %MaxJ) {
+;
+; CHECK-LABEL: @test_three_blocks(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[MAXJ:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[MAXJ]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[MAXJ]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK:       for.body.preheader.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[MAXJ]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH_PH:%.*]] = phi i32 [ [[TEMP_1_3:%.*]], [[FOR_INC_3:%.*]] ]
+; CHECK-NEXT:    [[J_010_UNR_PH:%.*]] = phi i32 [ [[INC_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[TEMP_09_UNR_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[J_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[J_010_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[TEMP_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_09_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
+; CHECK:       for.body.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_010_UNR]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[TOBOOL_EPIL:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL]], label [[FOR_INC_EPIL:%.*]], label [[IF_THEN_EPIL:%.*]]
+; CHECK:       if.then.epil:
+; CHECK-NEXT:    [[ARRAYIDX1_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_010_UNR]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i32 [[TMP3]], [[TEMP_09_UNR]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL]]
+; CHECK:       for.inc.epil:
+; CHECK-NEXT:    [[TEMP_1_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[IF_THEN_EPIL]] ], [ [[TEMP_09_UNR]], [[FOR_BODY_EPIL]] ]
+; CHECK-NEXT:    [[INC_EPIL:%.*]] = add nuw i32 [[J_010_UNR]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1:%.*]], [[FOR_INC_EPIL_1:%.*]] ], [ [[TEMP_1_EPIL_2:%.*]], [[FOR_INC_EPIL_2:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[J_010:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[TEMP_09:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_010]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_010]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP5]], [[TEMP_09]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[TEMP_09]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_010]], 1
+; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_1]], label [[FOR_INC_1:%.*]], label [[IF_THEN_1:%.*]]
+; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[TOBOOL_EPIL_1:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_1]], label [[FOR_INC_EPIL_1]], label [[IF_THEN_EPIL_1:%.*]]
+; CHECK:       if.then.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX1_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_1]], align 4
+; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[TMP8]], [[TEMP_1_EPIL]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
+; CHECK:       for.inc.epil.1:
+; CHECK-NEXT:    [[TEMP_1_EPIL_1]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[TEMP_1_EPIL]], [[FOR_BODY_EPIL_1]] ]
+; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[TOBOOL_EPIL_2:%.*]] = icmp eq i32 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_EPIL_2]], label [[FOR_INC_EPIL_2]], label [[IF_THEN_EPIL_2:%.*]]
+; CHECK:       if.then.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX1_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL_2]], align 4
+; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[TMP10]], [[TEMP_1_EPIL_1]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
+; CHECK:       for.inc.epil.2:
+; CHECK-NEXT:    [[TEMP_1_EPIL_2]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[TEMP_1_EPIL_1]], [[FOR_BODY_EPIL_2]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       if.then.1:
+; CHECK-NEXT:    [[ARRAYIDX1_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX1_1]], align 4
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[TMP11]], [[TEMP_1]]
+; CHECK-NEXT:    br label [[FOR_INC_1]]
+; CHECK:       for.inc.1:
+; CHECK-NEXT:    [[TEMP_1_1:%.*]] = phi i32 [ [[ADD_1]], [[IF_THEN_1]] ], [ [[TEMP_1]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
+; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_2]], label [[FOR_INC_2:%.*]], label [[IF_THEN_2:%.*]]
+; CHECK:       if.then.2:
+; CHECK-NEXT:    [[ARRAYIDX1_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX1_2]], align 4
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[TMP13]], [[TEMP_1_1]]
+; CHECK-NEXT:    br label [[FOR_INC_2]]
+; CHECK:       for.inc.2:
+; CHECK-NEXT:    [[TEMP_1_2:%.*]] = phi i32 [ [[ADD_2]], [[IF_THEN_2]] ], [ [[TEMP_1_1]], [[FOR_INC_1]] ]
+; CHECK-NEXT:    [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1
+; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[TMP14]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[FOR_INC_3]], label [[IF_THEN_3:%.*]]
+; CHECK:       if.then.3:
+; CHECK-NEXT:    [[ARRAYIDX1_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX1_3]], align 4
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[TMP15]], [[TEMP_1_2]]
+; CHECK-NEXT:    br label [[FOR_INC_3]]
+; CHECK:       for.inc.3:
+; CHECK-NEXT:    [[TEMP_1_3]] = phi i32 [ [[ADD_3]], [[IF_THEN_3]] ], [ [[TEMP_1_2]], [[FOR_INC_2]] ]
+; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[INC_2]], 1
+; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
+;
+  i32* nocapture readonly %Condition,
+  i32* nocapture readonly %Input,
+  i32 %MaxJ) {
 entry:
   %cmp8 = icmp eq i32 %MaxJ, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -57,21 +179,83 @@ for.inc:                                          ; preds = %for.body, %if.then
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-;CHECK-LABEL: test_two_exits
-;CHECK: for.body:
-;CHECK: if.end:
-;CHECK: cleanup.loopexit:
-;CHECK: cleanup:
-;CHECK: for.body.1:
-;CHECK: if.end.1:
-;CHECK: for.body.2:
-;CHECK: if.end.2:
-;CHECK: for.body.3:
-;CHECK: if.end.3:
 define void @test_two_exits(i32* nocapture %Output,
-                            i32* nocapture readonly %Condition,
-                            i32* nocapture readonly %Input,
-                            i32 %MaxJ) {
+;
+; CHECK-LABEL: @test_two_exits(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP14:%.*]] = icmp eq i32 [[MAXJ:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP14]], label [[CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[J_016:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_3:%.*]], [[IF_END_3:%.*]] ]
+; CHECK-NEXT:    [[TEMP_015:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_0_ADD_3:%.*]], [[IF_END_3]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_016]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP0]], 65535
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_016]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[TOBOOL]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT:    [[TEMP_0_ADD:%.*]] = add i32 [[ADD]], [[TEMP_015]]
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_016]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_1:%.*]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_ADD]], [[IF_END]] ], [ [[TEMP_015]], [[FOR_BODY]] ], [ [[TEMP_0_ADD]], [[FOR_BODY_1]] ], [ [[TEMP_0_ADD_1:%.*]], [[IF_END_1:%.*]] ], [ [[TEMP_0_ADD_1]], [[FOR_BODY_2:%.*]] ], [ [[TEMP_0_ADD_2:%.*]], [[IF_END_2:%.*]] ], [ [[TEMP_0_ADD_2]], [[FOR_BODY_3:%.*]] ], [ [[TEMP_0_ADD_3]], [[IF_END_3]] ]
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_0_LCSSA_PH]], [[CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       for.body.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP2]], 65535
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_1]]
+; CHECK:       if.end.1:
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[ADD_1:%.*]] = select i1 [[TOBOOL_1]], i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    [[TEMP_0_ADD_1]] = add i32 [[ADD_1]], [[TEMP_0_ADD]]
+; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[INC_1]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_2]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       for.body.2:
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ugt i32 [[TMP4]], 65535
+; CHECK-NEXT:    br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_2]]
+; CHECK:       if.end.2:
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    [[ADD_2:%.*]] = select i1 [[TOBOOL_2]], i32 0, i32 [[TMP4]]
+; CHECK-NEXT:    [[TEMP_0_ADD_2]] = add i32 [[ADD_2]], [[TEMP_0_ADD_1]]
+; CHECK-NEXT:    [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i32 [[INC_2]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[FOR_BODY_3]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       for.body.3:
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ugt i32 [[TMP6]], 65535
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_3]]
+; CHECK:       if.end.3:
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq i32 [[TMP7]], 0
+; CHECK-NEXT:    [[ADD_3:%.*]] = select i1 [[TOBOOL_3]], i32 0, i32 [[TMP6]]
+; CHECK-NEXT:    [[TEMP_0_ADD_3]] = add i32 [[ADD_3]], [[TEMP_0_ADD_2]]
+; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[INC_2]], 1
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ult i32 [[INC_3]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP_3]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
+;
+  i32* nocapture readonly %Condition,
+  i32* nocapture readonly %Input,
+  i32 %MaxJ) {
 entry:
   %cmp14 = icmp eq i32 %MaxJ, 0
   br i1 %cmp14, label %cleanup, label %for.body.preheader
@@ -103,16 +287,44 @@ cleanup:                                          ; preds = %if.end, %for.body,
   ret void
 }
 
-;CHECK-LABEL: test_three_exits
-;CHECK-NOT: for.body.epil
-;CHECK-NOT: if.end.epil
-;CHECK-LABEL: for.body
-;CHECK-LABEL: if.end
-;CHECK-LABEL: if.end5
 define void @test_three_exits(i32* nocapture %Output,
-                              i32* nocapture readonly %Condition,
-                              i32* nocapture readonly %Input,
-                              i32 %MaxJ) {
+;
+; CHECK-LABEL: @test_three_exits(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp eq i32 [[MAXJ:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP20]], label [[CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[J_022:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END5:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TEMP_021:%.*]] = phi i32 [ [[TEMP_0_ADD:%.*]], [[IF_END5]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_022]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP0]], 65535
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_022]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ugt i32 [[TMP1]], 65535
+; CHECK-NEXT:    br i1 [[CMP3]], label [[CLEANUP_LOOPEXIT]], label [[IF_END5]]
+; CHECK:       if.end5:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[TOBOOL]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT:    [[TEMP_0_ADD]] = add i32 [[ADD]], [[TEMP_021]]
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_022]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_ADD]], [[IF_END5]] ], [ [[TEMP_021]], [[FOR_BODY]] ], [ [[TEMP_021]], [[IF_END]] ]
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_0_LCSSA_PH]], [[CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  i32* nocapture readonly %Condition,
+  i32* nocapture readonly %Input,
+  i32 %MaxJ) {
 entry:
   %cmp20 = icmp eq i32 %MaxJ, 0
   br i1 %cmp20, label %cleanup, label %for.body.preheader
@@ -148,35 +360,225 @@ cleanup:                                          ; preds = %if.end5, %for.body,
   ret void
 }
 
-;CHECK-LABEL: test_four_blocks
-;CHECK: for.body.epil:
-;CHECK: if.else.epil:
-;CHECK: if.then.epil:
-;CHECK: for.cond.cleanup:
-;CHECK: for.body:
-;CHECK: if.then:
-;CHECK: for.inc:
-;CHECK: for.body.epil.1:
-;CHECK: if.else.epil.1:
-;CHECK: if.then.epil.1:
-;CHECK: for.inc.epil.1:
-;CHECK: for.body.epil.2:
-;CHECK: if.else.epil.2:
-;CHECK: if.then.epil.2:
-;CHECK: for.inc.epil.2:
-;CHECK: if.else.1:
-;CHECK: if.then.1:
-;CHECK: for.inc.1:
-;CHECK: if.else.2:
-;CHECK: if.then.2:
-;CHECK: for.inc.2:
-;CHECK: if.else.3:
-;CHECK: if.then.3:
-;CHECK: for.inc.3:
 define void @test_four_blocks(i32* nocapture %Output,
-                              i32* nocapture readonly %Condition,
-                              i32* nocapture readonly %Input,
-                              i32 %MaxJ) {
+;
+; CHECK-LABEL: @test_four_blocks(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP25:%.*]] = icmp ugt i32 [[MAXJ:%.*]], 1
+; CHECK-NEXT:    br i1 [[CMP25]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[INPUT:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[MAXJ]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[MAXJ]], -2
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
+; CHECK:       for.body.lr.ph.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH_PH:%.*]] = phi i32 [ [[TEMP_1_3:%.*]], [[FOR_INC_3:%.*]] ]
+; CHECK-NEXT:    [[DOTUNR_PH:%.*]] = phi i32 [ [[TMP23:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[J_027_UNR_PH:%.*]] = phi i32 [ [[INC_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[TEMP_026_UNR_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[DOTUNR:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH]] ], [ [[DOTUNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[J_027_UNR:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH]] ], [ [[J_027_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[TEMP_026_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[TEMP_026_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
+; CHECK:       for.body.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_027_UNR]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[CMP1_EPIL:%.*]] = icmp ugt i32 [[TMP3]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_027_UNR]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
+; CHECK-NEXT:    [[CMP4_EPIL:%.*]] = icmp ugt i32 [[TMP4]], [[DOTUNR]]
+; CHECK-NEXT:    br i1 [[CMP1_EPIL]], label [[IF_THEN_EPIL:%.*]], label [[IF_ELSE_EPIL:%.*]]
+; CHECK:       if.else.epil:
+; CHECK-NEXT:    [[NOT_CMP4_EPIL:%.*]] = xor i1 [[CMP4_EPIL]], true
+; CHECK-NEXT:    [[SUB_EPIL:%.*]] = sext i1 [[NOT_CMP4_EPIL]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_EPIL:%.*]] = add i32 [[J_027_UNR]], [[SUB_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_EPIL]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL]], align 4
+; CHECK-NEXT:    [[SUB13_EPIL:%.*]] = sub i32 [[TEMP_026_UNR]], [[TMP5]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL:%.*]]
+; CHECK:       if.then.epil:
+; CHECK-NEXT:    [[COND_EPIL:%.*]] = zext i1 [[CMP4_EPIL]] to i32
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i32 [[TEMP_026_UNR]], [[COND_EPIL]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL]]
+; CHECK:       for.inc.epil:
+; CHECK-NEXT:    [[TEMP_1_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[IF_THEN_EPIL]] ], [ [[SUB13_EPIL]], [[IF_ELSE_EPIL]] ]
+; CHECK-NEXT:    [[INC_EPIL:%.*]] = add nuw i32 [[J_027_UNR]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
+; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1:%.*]], [[FOR_INC_EPIL_1:%.*]] ], [ [[TEMP_1_EPIL_2:%.*]], [[FOR_INC_EPIL_2:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_0_LCSSA]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP6:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH_NEW]] ], [ [[TMP23]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[J_027:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[INC_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[TEMP_026:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[TEMP_1_3]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_INC_3]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_027]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[TMP7]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_027]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[TMP8]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TEMP_026]], [[COND]]
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[NOT_CMP4:%.*]] = xor i1 [[CMP4]], true
+; CHECK-NEXT:    [[SUB:%.*]] = sext i1 [[NOT_CMP4]] to i32
+; CHECK-NEXT:    [[SUB10_SINK:%.*]] = add i32 [[J_027]], [[SUB]]
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[SUB13:%.*]] = sub i32 [[TEMP_026]], [[TMP9]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TEMP_1:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[SUB13]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[J_027]], 1
+; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp ugt i32 [[TMP10]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[CMP4_1:%.*]] = icmp ugt i32 [[TMP11]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_ELSE_1:%.*]]
+; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[CMP1_EPIL_1:%.*]] = icmp ugt i32 [[TMP12]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
+; CHECK-NEXT:    [[CMP4_EPIL_1:%.*]] = icmp ugt i32 [[TMP13]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[CMP1_EPIL_1]], label [[IF_THEN_EPIL_1:%.*]], label [[IF_ELSE_EPIL_1:%.*]]
+; CHECK:       if.else.epil.1:
+; CHECK-NEXT:    [[NOT_CMP4_EPIL_1:%.*]] = xor i1 [[CMP4_EPIL_1]], true
+; CHECK-NEXT:    [[SUB_EPIL_1:%.*]] = sext i1 [[NOT_CMP4_EPIL_1]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_EPIL_1:%.*]] = add i32 [[INC_EPIL]], [[SUB_EPIL_1]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_EPIL_1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_1]], align 4
+; CHECK-NEXT:    [[SUB13_EPIL_1:%.*]] = sub i32 [[TEMP_1_EPIL]], [[TMP14]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
+; CHECK:       if.then.epil.1:
+; CHECK-NEXT:    [[COND_EPIL_1:%.*]] = zext i1 [[CMP4_EPIL_1]] to i32
+; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[TEMP_1_EPIL]], [[COND_EPIL_1]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_1]]
+; CHECK:       for.inc.epil.1:
+; CHECK-NEXT:    [[TEMP_1_EPIL_1]] = phi i32 [ [[ADD_EPIL_1]], [[IF_THEN_EPIL_1]] ], [ [[SUB13_EPIL_1]], [[IF_ELSE_EPIL_1]] ]
+; CHECK-NEXT:    [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[CMP1_EPIL_2:%.*]] = icmp ugt i32 [[TMP15]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
+; CHECK-NEXT:    [[CMP4_EPIL_2:%.*]] = icmp ugt i32 [[TMP16]], [[TMP13]]
+; CHECK-NEXT:    br i1 [[CMP1_EPIL_2]], label [[IF_THEN_EPIL_2:%.*]], label [[IF_ELSE_EPIL_2:%.*]]
+; CHECK:       if.else.epil.2:
+; CHECK-NEXT:    [[NOT_CMP4_EPIL_2:%.*]] = xor i1 [[CMP4_EPIL_2]], true
+; CHECK-NEXT:    [[SUB_EPIL_2:%.*]] = sext i1 [[NOT_CMP4_EPIL_2]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_EPIL_2:%.*]] = add i32 [[INC_EPIL_1]], [[SUB_EPIL_2]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_EPIL_2]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX11_EPIL_2]], align 4
+; CHECK-NEXT:    [[SUB13_EPIL_2:%.*]] = sub i32 [[TEMP_1_EPIL_1]], [[TMP17]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
+; CHECK:       if.then.epil.2:
+; CHECK-NEXT:    [[COND_EPIL_2:%.*]] = zext i1 [[CMP4_EPIL_2]] to i32
+; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[TEMP_1_EPIL_1]], [[COND_EPIL_2]]
+; CHECK-NEXT:    br label [[FOR_INC_EPIL_2]]
+; CHECK:       for.inc.epil.2:
+; CHECK-NEXT:    [[TEMP_1_EPIL_2]] = phi i32 [ [[ADD_EPIL_2]], [[IF_THEN_EPIL_2]] ], [ [[SUB13_EPIL_2]], [[IF_ELSE_EPIL_2]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       if.else.1:
+; CHECK-NEXT:    [[NOT_CMP4_1:%.*]] = xor i1 [[CMP4_1]], true
+; CHECK-NEXT:    [[SUB_1:%.*]] = sext i1 [[NOT_CMP4_1]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_1:%.*]] = add i32 [[INC]], [[SUB_1]]
+; CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX11_1]], align 4
+; CHECK-NEXT:    [[SUB13_1:%.*]] = sub i32 [[TEMP_1]], [[TMP18]]
+; CHECK-NEXT:    br label [[FOR_INC_1:%.*]]
+; CHECK:       if.then.1:
+; CHECK-NEXT:    [[COND_1:%.*]] = zext i1 [[CMP4_1]] to i32
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[TEMP_1]], [[COND_1]]
+; CHECK-NEXT:    br label [[FOR_INC_1]]
+; CHECK:       for.inc.1:
+; CHECK-NEXT:    [[TEMP_1_1:%.*]] = phi i32 [ [[ADD_1]], [[IF_THEN_1]] ], [ [[SUB13_1]], [[IF_ELSE_1]] ]
+; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1
+; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp ugt i32 [[TMP19]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[CMP4_2:%.*]] = icmp ugt i32 [[TMP20]], [[TMP11]]
+; CHECK-NEXT:    br i1 [[CMP1_2]], label [[IF_THEN_2:%.*]], label [[IF_ELSE_2:%.*]]
+; CHECK:       if.else.2:
+; CHECK-NEXT:    [[NOT_CMP4_2:%.*]] = xor i1 [[CMP4_2]], true
+; CHECK-NEXT:    [[SUB_2:%.*]] = sext i1 [[NOT_CMP4_2]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_2:%.*]] = add i32 [[INC_1]], [[SUB_2]]
+; CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_2]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX11_2]], align 4
+; CHECK-NEXT:    [[SUB13_2:%.*]] = sub i32 [[TEMP_1_1]], [[TMP21]]
+; CHECK-NEXT:    br label [[FOR_INC_2:%.*]]
+; CHECK:       if.then.2:
+; CHECK-NEXT:    [[COND_2:%.*]] = zext i1 [[CMP4_2]] to i32
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[TEMP_1_1]], [[COND_2]]
+; CHECK-NEXT:    br label [[FOR_INC_2]]
+; CHECK:       for.inc.2:
+; CHECK-NEXT:    [[TEMP_1_2:%.*]] = phi i32 [ [[ADD_2]], [[IF_THEN_2]] ], [ [[SUB13_2]], [[IF_ELSE_2]] ]
+; CHECK-NEXT:    [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1
+; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp ugt i32 [[TMP22]], 65535
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP23]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[CMP4_3:%.*]] = icmp ugt i32 [[TMP23]], [[TMP20]]
+; CHECK-NEXT:    br i1 [[CMP1_3]], label [[IF_THEN_3:%.*]], label [[IF_ELSE_3:%.*]]
+; CHECK:       if.else.3:
+; CHECK-NEXT:    [[NOT_CMP4_3:%.*]] = xor i1 [[CMP4_3]], true
+; CHECK-NEXT:    [[SUB_3:%.*]] = sext i1 [[NOT_CMP4_3]] to i32
+; CHECK-NEXT:    [[SUB10_SINK_3:%.*]] = add i32 [[INC_2]], [[SUB_3]]
+; CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB10_SINK_3]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX11_3]], align 4
+; CHECK-NEXT:    [[SUB13_3:%.*]] = sub i32 [[TEMP_1_2]], [[TMP24]]
+; CHECK-NEXT:    br label [[FOR_INC_3]]
+; CHECK:       if.then.3:
+; CHECK-NEXT:    [[COND_3:%.*]] = zext i1 [[CMP4_3]] to i32
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[TEMP_1_2]], [[COND_3]]
+; CHECK-NEXT:    br label [[FOR_INC_3]]
+; CHECK:       for.inc.3:
+; CHECK-NEXT:    [[TEMP_1_3]] = phi i32 [ [[ADD_3]], [[IF_THEN_3]] ], [ [[SUB13_3]], [[IF_ELSE_3]] ]
+; CHECK-NEXT:    [[INC_3]] = add nuw nsw i32 [[INC_2]], 1
+; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
+;
+  i32* nocapture readonly %Condition,
+  i32* nocapture readonly %Input,
+  i32 %MaxJ) {
 entry:
   %cmp25 = icmp ugt i32 %MaxJ, 1
   br i1 %cmp25, label %for.body.lr.ph, label %for.cond.cleanup
@@ -223,17 +625,54 @@ for.inc:                                          ; preds = %if.then, %if.else
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-;CHECK-LABEL: test_five_blocks
-;CHECK-NOT: for.body.epil:
-;CHECK: for.body:
-;CHECK: if.end:
-;CHECK: if.else:
-;CHECK: for.inc:
-;CHECK-NOT: for.inc.1:
 define void @test_five_blocks(i32* nocapture %Output,
-                              i32* nocapture readonly %Condition,
-                              i32* nocapture readonly %Input,
-                              i32 %MaxJ) {
+;
+; CHECK-LABEL: @test_five_blocks(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP24:%.*]] = icmp ugt i32 [[MAXJ:%.*]], 1
+; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[J_026:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TEMP_025:%.*]] = phi i32 [ [[TEMP_1:%.*]], [[FOR_INC]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_026]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], [[TEMP_025]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[ADD]], 16777215
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_026]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ugt i32 [[TMP1]], 65535
+; CHECK-NEXT:    br i1 [[CMP3]], label [[IF_THEN4:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then4:
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[J_026]], -1
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[COND:%.*]] = zext i1 [[CMP7]] to i32
+; CHECK-NEXT:    [[ADD8:%.*]] = add i32 [[ADD]], [[COND]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TEMP_1]] = phi i32 [ [[ADD8]], [[IF_THEN4]] ], [ [[AND]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_026]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
+; CHECK:       cleanup.loopexit:
+; CHECK-NEXT:    [[TEMP_2_PH:%.*]] = phi i32 [ [[TEMP_1]], [[FOR_INC]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    [[TEMP_2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_2_PH]], [[CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    store i32 [[TEMP_2]], i32* [[OUTPUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  i32* nocapture readonly %Condition,
+  i32* nocapture readonly %Input,
+  i32 %MaxJ) {
 entry:
   %cmp24 = icmp ugt i32 %MaxJ, 1
   br i1 %cmp24, label %for.body.preheader, label %cleanup
@@ -281,14 +720,69 @@ cleanup:                                          ; preds = %for.inc, %for.body,
   ret void
 }
 
-;CHECK-LABEL: iterate_inc
-;CHECK: while.body:
-;CHECK: while.end:
-;CHECK: while.body.1:
-;CHECK: while.body.2:
-;CHECK: while.body.3:
 %struct.Node = type { %struct.Node*, i32 }
 define void @iterate_inc(%struct.Node* %n, i32 %limit) {
+;
+; CHECK-LABEL: @iterate_inc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq %struct.Node* [[N:%.*]], null
+; CHECK-NEXT:    br i1 [[TOBOOL5]], label [[WHILE_END:%.*]], label [[LAND_RHS_PREHEADER:%.*]]
+; CHECK:       land.rhs.preheader:
+; CHECK-NEXT:    br label [[LAND_RHS:%.*]]
+; CHECK:       land.rhs:
+; CHECK-NEXT:    [[LIST_ADDR_06:%.*]] = phi %struct.Node* [ [[N]], [[LAND_RHS_PREHEADER]] ], [ [[TMP11:%.*]], [[WHILE_BODY_3:%.*]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], %struct.Node* [[LIST_ADDR_06]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VAL]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[LIMIT:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END_LOOPEXIT:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[VAL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %struct.Node* [[LIST_ADDR_06]] to %struct.Node**
+; CHECK-NEXT:    [[TMP2:%.*]] = load %struct.Node*, %struct.Node** [[TMP1]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq %struct.Node* [[TMP2]], null
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS_1:%.*]]
+; CHECK:       while.end.loopexit:
+; CHECK-NEXT:    br label [[WHILE_END]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret void
+; CHECK:       land.rhs.1:
+; CHECK-NEXT:    [[VAL_1:%.*]] = getelementptr inbounds [[STRUCT_NODE]], %struct.Node* [[TMP2]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[VAL_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i32 [[TMP3]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[WHILE_BODY_1:%.*]], label [[WHILE_END_LOOPEXIT]]
+; CHECK:       while.body.1:
+; CHECK-NEXT:    [[INC_1:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[INC_1]], i32* [[VAL_1]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast %struct.Node* [[TMP2]] to %struct.Node**
+; CHECK-NEXT:    [[TMP5:%.*]] = load %struct.Node*, %struct.Node** [[TMP4]], align 4
+; CHECK-NEXT:    [[TOBOOL_1:%.*]] = icmp eq %struct.Node* [[TMP5]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_1]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS_2:%.*]]
+; CHECK:       land.rhs.2:
+; CHECK-NEXT:    [[VAL_2:%.*]] = getelementptr inbounds [[STRUCT_NODE]], %struct.Node* [[TMP5]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[VAL_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp slt i32 [[TMP6]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[WHILE_BODY_2:%.*]], label [[WHILE_END_LOOPEXIT]]
+; CHECK:       while.body.2:
+; CHECK-NEXT:    [[INC_2:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK-NEXT:    store i32 [[INC_2]], i32* [[VAL_2]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast %struct.Node* [[TMP5]] to %struct.Node**
+; CHECK-NEXT:    [[TMP8:%.*]] = load %struct.Node*, %struct.Node** [[TMP7]], align 4
+; CHECK-NEXT:    [[TOBOOL_2:%.*]] = icmp eq %struct.Node* [[TMP8]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_2]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS_3:%.*]]
+; CHECK:       land.rhs.3:
+; CHECK-NEXT:    [[VAL_3:%.*]] = getelementptr inbounds [[STRUCT_NODE]], %struct.Node* [[TMP8]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[VAL_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp slt i32 [[TMP9]], [[LIMIT]]
+; CHECK-NEXT:    br i1 [[CMP_3]], label [[WHILE_BODY_3]], label [[WHILE_END_LOOPEXIT]]
+; CHECK:       while.body.3:
+; CHECK-NEXT:    [[INC_3:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[INC_3]], i32* [[VAL_3]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast %struct.Node* [[TMP8]] to %struct.Node**
+; CHECK-NEXT:    [[TMP11]] = load %struct.Node*, %struct.Node** [[TMP10]], align 4
+; CHECK-NEXT:    [[TOBOOL_3:%.*]] = icmp eq %struct.Node* [[TMP11]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_3]], label [[WHILE_END_LOOPEXIT]], label [[LAND_RHS]]
+;
 entry:
   %tobool5 = icmp eq %struct.Node* %n, null
   br i1 %tobool5, label %while.end, label %land.rhs.preheader

diff  --git a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
index caa59da17490c..3160517155844 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
@@ -1,17 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -loop-unroll < %s | FileCheck %s
 ; RUN: opt -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' < %s | FileCheck %s
 
 ; Unroll twice, with first loop exit kept
-; CHECK-LABEL: @s32_max1
-; CHECK: do.body:
-; CHECK:  store
-; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
-; CHECK: do.end:
-; CHECK:  ret void
-; CHECK: do.body.1:
-; CHECK:  store
-; CHECK:  br label %do.end
 define void @s32_max1(i32 %n, i32* %p) {
+;
+; CHECK-LABEL: @s32_max1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 1
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
+; CHECK-NEXT:    store i32 [[N]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+; CHECK:       do.body.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
+; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    br label [[DO_END]]
+;
 entry:
   %add = add i32 %n, 1
   br label %do.body
@@ -29,17 +39,28 @@ do.end:
 }
 
 ; Unroll thrice, with first loop exit kept
-; CHECK-LABEL: @s32_max2
-; CHECK: do.body:
-; CHECK:  store
-; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
-; CHECK: do.end:
-; CHECK:  ret void
-; CHECK: do.body.1:
-; CHECK:  store
-; CHECK:  store
-; CHECK:  br label %do.end
 define void @s32_max2(i32 %n, i32* %p) {
+;
+; CHECK-LABEL: @s32_max2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 2
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
+; CHECK-NEXT:    store i32 [[N]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+; CHECK:       do.body.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
+; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[INC_1:%.*]] = add i32 [[INC]], 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
+; CHECK-NEXT:    store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    br label [[DO_END]]
+;
 entry:
   %add = add i32 %n, 2
   br label %do.body
@@ -57,11 +78,22 @@ do.end:
 }
 
 ; Should not be unrolled
-; CHECK-LABEL: @s32_maxx
-; CHECK: do.body:
-; CHECK: do.end:
-; CHECK-NOT: do.body.1:
 define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
+;
+; CHECK-LABEL: @s32_maxx(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
+; CHECK-NEXT:    store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC]] = add i32 [[I_0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %add = add i32 %x, %n
   br label %do.body
@@ -79,11 +111,25 @@ do.end:
 }
 
 ; Should not be unrolled
-; CHECK-LABEL: @s32_max2_unpredictable_exit
-; CHECK: do.body:
-; CHECK: do.end:
-; CHECK-NOT: do.body.1:
 define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+;
+; CHECK-LABEL: @s32_max2_unpredictable_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 2
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
+; CHECK-NEXT:    store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC]] = add i32 [[I_0]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %add = add i32 %n, 2
   br label %do.body
@@ -105,16 +151,25 @@ do.end:
 }
 
 ; Unroll twice, with first loop exit kept
-; CHECK-LABEL: @u32_max1
-; CHECK: do.body:
-; CHECK:  store
-; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
-; CHECK: do.end:
-; CHECK:  ret void
-; CHECK: do.body.1:
-; CHECK:  store
-; CHECK:  br label %do.end
 define void @u32_max1(i32 %n, i32* %p) {
+;
+; CHECK-LABEL: @u32_max1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 1
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
+; CHECK-NEXT:    store i32 [[N]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+; CHECK:       do.body.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
+; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    br label [[DO_END]]
+;
 entry:
   %add = add i32 %n, 1
   br label %do.body
@@ -132,17 +187,28 @@ do.end:
 }
 
 ; Unroll thrice, with first loop exit kept
-; CHECK-LABEL: @u32_max2
-; CHECK: do.body:
-; CHECK:  store
-; CHECK:  br i1 %cmp, label %do.body.1, label %do.end
-; CHECK: do.end:
-; CHECK:  ret void
-; CHECK: do.body.1:
-; CHECK:  store
-; CHECK:  store
-; CHECK:  br label %do.end
 define void @u32_max2(i32 %n, i32* %p) {
+;
+; CHECK-LABEL: @u32_max2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 2
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
+; CHECK-NEXT:    store i32 [[N]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add i32 [[N]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+; CHECK:       do.body.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
+; CHECK-NEXT:    store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[INC_1:%.*]] = add i32 [[INC]], 1
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
+; CHECK-NEXT:    store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    br label [[DO_END]]
+;
 entry:
   %add = add i32 %n, 2
   br label %do.body
@@ -160,11 +226,22 @@ do.end:
 }
 
 ; Should not be unrolled
-; CHECK-LABEL: @u32_maxx
-; CHECK: do.body:
-; CHECK: do.end:
-; CHECK-NOT: do.body.1:
 define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
+;
+; CHECK-LABEL: @u32_maxx(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
+; CHECK-NEXT:    store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC]] = add i32 [[I_0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %add = add i32 %x, %n
   br label %do.body
@@ -182,11 +259,25 @@ do.end:
 }
 
 ; Should not be unrolled
-; CHECK-LABEL: @u32_max2_unpredictable_exit
-; CHECK: do.body:
-; CHECK: do.end:
-; CHECK-NOT: do.body.1:
 define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
+;
+; CHECK-LABEL: @u32_max2_unpredictable_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[N:%.*]], 2
+; CHECK-NEXT:    br label [[DO_BODY:%.*]]
+; CHECK:       do.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
+; CHECK-NEXT:    store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INC]] = add i32 [[I_0]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
+; CHECK:       do.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %add = add i32 %n, 2
   br label %do.body

diff  --git a/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
index dc13642d85105..10f6a31445b72 100644
--- a/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
+++ b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll
@@ -1,48 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-unroll -unroll-allow-partial -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s
 
 @table = internal unnamed_addr global [344 x i32] zeroinitializer, align 16
 
 define i32 @test_partial_unroll_with_breakout_at_iter0() {
-; CHECK-LABEL: define i32 @test_partial_unroll_with_breakout_at_iter0() {
-; CHECK-LABEL: entry:
-; CHECK-NEXT:    br label %for.header
-
-; CHECK-LABEL: for.header:                                       ; preds = %for.latch.3, %entry
-; CHECK-NEXT:    %red = phi i32 [ 0, %entry ], [ %red.next.3, %for.latch.3 ]
-; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next.3, %for.latch.3 ]
-; CHECK-NEXT:    %red.next = add nuw nsw i32 10, %red
-; CHECK-NEXT:    %iv.next = add nuw nsw i64 %iv, 2
-; CHECK-NEXT:    %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next
-; CHECK-NEXT:    store i32 %red.next, i32* %ptr, align 4
-; CHECK-NEXT:    br label %for.latch
-
-; CHECK-LABEL: for.latch:                                        ; preds = %for.header
-; CHECK-NEXT:    %red.next.1 = add nuw nsw i32 10, %red.next
-; CHECK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 2
-; CHECK-NEXT:    %ptr.1 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.1
-; CHECK-NEXT:    store i32 %red.next.1, i32* %ptr.1, align 4
-; CHECK-NEXT:    br label %for.latch.1
-
-; CHECK-LABEL: exit:                                             ; preds = %for.latch.2
+;
+;
+; CHECK-LABEL: @test_partial_unroll_with_breakout_at_iter0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
+; CHECK:       for.header:
+; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RED_NEXT_3:%.*]], [[FOR_LATCH_3:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_3:%.*]], [[FOR_LATCH_3]] ]
+; CHECK-NEXT:    [[RED_NEXT:%.*]] = add nuw nsw i32 10, [[RED]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT]]
+; CHECK-NEXT:    store i32 [[RED_NEXT]], i32* [[PTR]], align 4
+; CHECK-NEXT:    br label [[FOR_LATCH:%.*]]
+; CHECK:       for.latch:
+; CHECK-NEXT:    [[RED_NEXT_1:%.*]] = add nuw nsw i32 10, [[RED_NEXT]]
+; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 2
+; CHECK-NEXT:    [[PTR_1:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_1]]
+; CHECK-NEXT:    store i32 [[RED_NEXT_1]], i32* [[PTR_1]], align 4
+; CHECK-NEXT:    br label [[FOR_LATCH_1:%.*]]
+; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
-
-; CHECK-LABEL: for.latch.1:                                      ; preds = %for.latch
-; CHECK-NEXT:    %red.next.2 = add nuw nsw i32 10, %red.next.1
-; CHECK-NEXT:    %iv.next.2 = add nuw nsw i64 %iv.next.1, 2
-; CHECK-NEXT:    %ptr.2 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.2
-; CHECK-NEXT:    store i32 %red.next.2, i32* %ptr.2, align 4
-; CHECK-NEXT:    br label %for.latch.2
-
-; CHECK-LABEL: for.latch.2:                                      ; preds = %for.latch.1
-; CHECK-NEXT:    %red.next.3 = add nuw nsw i32 10, %red.next.2
-; CHECK-NEXT:    %iv.next.3 = add nuw nsw i64 %iv.next.2, 2
-; CHECK-NEXT:    %ptr.3 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.3
-; CHECK-NEXT:    store i32 %red.next.3, i32* %ptr.3, align 4
-; CHECK-NEXT:    %exitcond.1.i.3 = icmp eq i64 %iv.next.3, 344
-; CHECK-NEXT:    br i1 %exitcond.1.i.3, label %exit, label %for.latch.3
-
-; CHECK-LABEL: for.latch.3:                                      ; preds = %for.latch.2
-; CHECK-NEXT:    br label %for.header
+; CHECK:       for.latch.1:
+; CHECK-NEXT:    [[RED_NEXT_2:%.*]] = add nuw nsw i32 10, [[RED_NEXT_1]]
+; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 2
+; CHECK-NEXT:    [[PTR_2:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_2]]
+; CHECK-NEXT:    store i32 [[RED_NEXT_2]], i32* [[PTR_2]], align 4
+; CHECK-NEXT:    br label [[FOR_LATCH_2:%.*]]
+; CHECK:       for.latch.2:
+; CHECK-NEXT:    [[RED_NEXT_3]] = add nuw nsw i32 10, [[RED_NEXT_2]]
+; CHECK-NEXT:    [[IV_NEXT_3]] = add nuw nsw i64 [[IV_NEXT_2]], 2
+; CHECK-NEXT:    [[PTR_3:%.*]] = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 [[IV_NEXT_3]]
+; CHECK-NEXT:    store i32 [[RED_NEXT_3]], i32* [[PTR_3]], align 4
+; CHECK-NEXT:    [[EXITCOND_1_I_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 344
+; CHECK-NEXT:    br i1 [[EXITCOND_1_I_3]], label [[EXIT:%.*]], label [[FOR_LATCH_3]]
+; CHECK:       for.latch.3:
+; CHECK-NEXT:    br label [[FOR_HEADER]]
 ;
 entry:
   br label %for.header

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 413ae0dc104f4..707e5278fdd06 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s
 ; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL
 
@@ -5,27 +6,161 @@
 
 ; the second exit block is a deopt block. The loop has one exiting block other than the latch.
 define i32 @test1(i32* nocapture %a, i64 %n) {
-; CHECK-LABEL: test1(
-; CHECK-LABEL:  header.epil:
-; CHECK-NEXT:     %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; CHECK-LABEL:  otherexit.loopexit:
-; CHECK-NEXT:     %sum.02.lcssa.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ],
-; CHECK-NEXT:     br label %otherexit
-; CHECK-LABEL:  otherexit.loopexit3:
-; CHECK-NEXT:     br label %otherexit
-; CHECK-LABEL:  otherexit:
-; CHECK-NEXT:     %sum.02.lcssa = phi i32 [ %sum.02.lcssa.ph, %otherexit.loopexit ], [ %sum.02.epil, %otherexit.loopexit3 ]
-; CHECK-NEXT:     %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ]
-; CHECK-NEXT:     ret i32 %rval
-; CHECK-LABEL:  latch.7:
-; CHECK:          add i64 %indvars.iv, 8
-
-; NOUNROLL: test1(
-; NOUNROLL-NOT: .epil
-; NOUNROLL-NOT: .prol
-; NOUNROLL:   otherexit:
-; NOUNROLL-NEXT:   %sum.02.lcssa = phi i32 [ %sum.02, %for.exiting_block ]
-; NOUNROLL-NEXT:   %rval = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum.02.lcssa) ] 
+;
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK:       entry.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[N]], -8
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LATCH_7]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK:       for.exiting_block:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_1:%.*]]
+; CHECK:       latchexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK:       latchexit.unr-lcssa:
+; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
+; CHECK:       header.epil:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
+; CHECK:       for.exiting_block.epil:
+; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
+; CHECK:       latch.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_EPIL]] = add nsw i32 [[TMP3]], [[SUM_02_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
+; CHECK-NEXT:    [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       latchexit.epilog-lcssa:
+; CHECK-NEXT:    br label [[LATCHEXIT]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; CHECK:       otherexit.loopexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[FOR_EXITING_BLOCK_3:%.*]] ], [ [[ADD_3:%.*]], [[FOR_EXITING_BLOCK_4:%.*]] ], [ [[ADD_4:%.*]], [[FOR_EXITING_BLOCK_5:%.*]] ], [ [[ADD_5:%.*]], [[FOR_EXITING_BLOCK_6:%.*]] ], [ [[ADD_6:%.*]], [[FOR_EXITING_BLOCK_7:%.*]] ]
+; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
+; CHECK:       otherexit.loopexit3:
+; CHECK-NEXT:    br label [[OTHEREXIT]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02_LCSSA_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
+; CHECK-NEXT:    [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; CHECK-NEXT:    ret i32 [[RVAL]]
+; CHECK:       for.exiting_block.1:
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
+; CHECK:       latch.1:
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_2]]
+; CHECK:       for.exiting_block.2:
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
+; CHECK:       latch.2:
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_3]]
+; CHECK:       for.exiting_block.3:
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
+; CHECK:       latch.3:
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_4]]
+; CHECK:       for.exiting_block.4:
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
+; CHECK:       latch.4:
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_5]]
+; CHECK:       for.exiting_block.5:
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
+; CHECK:       latch.5:
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_6]]
+; CHECK:       for.exiting_block.6:
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
+; CHECK:       latch.6:
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_7]]
+; CHECK:       for.exiting_block.7:
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp eq i64 [[N]], 42
+; CHECK-NEXT:    br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
+; CHECK:       latch.7:
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
+; CHECK-NEXT:    [[NITER_NSUB_7]] = add i64 [[NITER]], -8
+; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]], label [[HEADER]]
+;
+; NOUNROLL-LABEL: @test1(
+; NOUNROLL-NEXT:  entry:
+; NOUNROLL-NEXT:    br label [[HEADER:%.*]]
+; NOUNROLL:       header:
+; NOUNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL:       for.exiting_block:
+; NOUNROLL-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT:    br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; NOUNROLL:       latch:
+; NOUNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT:    br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL:       latchexit:
+; NOUNROLL-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL:       otherexit:
+; NOUNROLL-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_02_LCSSA]]) ]
+; NOUNROLL-NEXT:    ret i32 [[RVAL]]
+;
 entry:
   br label %header
 
@@ -35,8 +170,8 @@ header:
   br label %for.exiting_block
 
 for.exiting_block:
- %cmp = icmp eq i64 %n, 42
- br i1 %cmp, label %otherexit, label %latch
+  %cmp = icmp eq i64 %n, 42
+  br i1 %cmp, label %otherexit, label %latch
 
 latch:
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
@@ -57,12 +192,53 @@ otherexit:
 
 ; the exit block is not a deopt block.
 define i32 @test2(i32* nocapture %a, i64 %n) {
-; CHECK-LABEL: test2(
-; CHECK-NOT: .epil
-; CHECK-NOT: .prol
-; CHECK-LABEL: otherexit:
-; CHECK-NEXT:    ret i32 %sum.02
-
+;
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; CHECK:       for.exiting_block:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; CHECK-NEXT:    br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; CHECK:       latchexit:
+; CHECK-NEXT:    ret i32 [[ADD]]
+; CHECK:       otherexit:
+; CHECK-NEXT:    ret i32 [[SUM_02]]
+;
+; NOUNROLL-LABEL: @test2(
+; NOUNROLL-NEXT:  entry:
+; NOUNROLL-NEXT:    br label [[HEADER:%.*]]
+; NOUNROLL:       header:
+; NOUNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; NOUNROLL-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
+; NOUNROLL-NEXT:    br label [[FOR_EXITING_BLOCK:%.*]]
+; NOUNROLL:       for.exiting_block:
+; NOUNROLL-NEXT:    [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42
+; NOUNROLL-NEXT:    br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]]
+; NOUNROLL:       latch:
+; NOUNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; NOUNROLL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; NOUNROLL-NEXT:    [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]]
+; NOUNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; NOUNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; NOUNROLL-NEXT:    br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]]
+; NOUNROLL:       latchexit:
+; NOUNROLL-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[LATCH]] ]
+; NOUNROLL-NEXT:    ret i32 [[SUM_0_LCSSA]]
+; NOUNROLL:       otherexit:
+; NOUNROLL-NEXT:    [[RVAL:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ]
+; NOUNROLL-NEXT:    ret i32 [[RVAL]]
+;
 entry:
   br label %header
 
@@ -72,8 +248,8 @@ header:
   br label %for.exiting_block
 
 for.exiting_block:
- %cmp = icmp eq i64 %n, 42
- br i1 %cmp, label %otherexit, label %latch
+  %cmp = icmp eq i64 %n, 42
+  br i1 %cmp, label %otherexit, label %latch
 
 latch:
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index 6ea1b5409c74b..da3b4ecba0ee6 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -1,7 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -instcombine | FileCheck %s
 
-; CHECK-LABEL: unroll
 define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+;
+; CHECK-LABEL: @unroll(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP9]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
+; CHECK:       for.body.lr.ph.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER:%.*]]
+; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
+; CHECK:       for.body.epil:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4
+; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
+; CHECK:       for.cond.cleanup.loopexit.epilog-lcssa:
+; CHECK-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_BODY_EPIL_1]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_BODY_EPIL_2:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[C_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[C_0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[TMP9]], [[TMP8]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT:    [[NITER_NSUB_3]] = add i64 [[NITER]], -4
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       for.body.epil.1:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4
+; CHECK-NEXT:    [[MUL_EPIL_1:%.*]] = mul nsw i32 [[TMP13]], [[TMP12]]
+; CHECK-NEXT:    [[ADD_EPIL_1]] = add nsw i32 [[MUL_EPIL_1]], [[ADD_EPIL]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2]]
+; CHECK:       for.body.epil.2:
+; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4
+; CHECK-NEXT:    [[MUL_EPIL_2:%.*]] = mul nsw i32 [[TMP15]], [[TMP14]]
+; CHECK-NEXT:    [[ADD_EPIL_2]] = add nsw i32 [[MUL_EPIL_2]], [[ADD_EPIL_1]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]]
+;
 entry:
   %cmp9 = icmp eq i32 %N, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
@@ -14,50 +111,15 @@ for.cond.cleanup:
   %c.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %c.0.lcssa
 
-; CHECK-LABEL: for.body.lr.ph
-; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1
-; CHECK: %xtraiter = and i64 %wide.trip.count, 3
-; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3
-; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new
 
-; CHECK-LABEL: for.body.lr.ph.new:
-; CHECK: %unroll_iter = and i64 %wide.trip.count, 4294967292
-; CHECK: br label %for.body
 
-; CHECK: [[CLEANUP]]:
-; CHECK: [[MOD:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 0
-; CHECK: br i1 [[MOD]], label %[[EXIT:.*]], label %[[EPIL_PEEL0_PRE:.*]]
 
-; CHECK: [[EPIL_PEEL0_PRE]]:
-; CHECK: br label %[[EPIL_PEEL0:.*]]
 
-; CHECK: [[EPIL_PEEL0]]:
-; CHECK: [[PEEL_CMP0:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 1
-; CHECK: br i1 [[PEEL_CMP0]], label %[[EPIL_EXIT:.*]], label %[[EPIL_PEEL1:.*]]
 
-; CHECK: [[EPIL_EXIT]]:
-; CHECK: br label %[[EXIT]]
 
-; CHECK: [[EXIT]]:
-; CHECK: ret i32
 
-; CHECK-LABEL: for.body:
-; CHECK: [[INDVAR0:%[a-z.0-9]+]] = phi i64 [ 0, %for.body.lr.ph
-; CHECK: [[ITER:%[a-z.0-9]+]] = phi i64 [ %unroll_iter
-; CHECK: or i64 [[INDVAR0]], 1
-; CHECK: or i64 [[INDVAR0]], 2
-; CHECK: or i64 [[INDVAR0]], 3
-; CHECK: add nuw nsw i64 [[INDVAR0]], 4
-; CHECK: [[SUB:%[a-z.0-9]+]] = add i64 [[ITER]], -4
-; CHECK: [[ITER_CMP:%[a-z.0-9]+]] = icmp eq i64 [[SUB]], 0
-; CHECK: br i1 [[ITER_CMP]], label %[[LOOP_EXIT:.*]], label %for.body
 
-; CHECK: [[EPIL_PEEL1]]:
-; CHECK: [[PEEL_CMP1:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 2
-; CHECK: br i1 [[PEEL_CMP1]], label %[[EPIL_EXIT]], label %[[EPIL_PEEL2:.*]]
 
-; CHECK: [[EPIL_PEEL2]]:
-; CHECK: br label %[[EXIT]]
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]


        


More information about the llvm-commits mailing list