[llvm] r308421 - [LV] Test once if vector trip count is zero, instead of twice

Tue Jul 18 22:16:39 PDT 2017

Author: ayalz
Date: Tue Jul 18 22:16:39 2017
New Revision: 308421

URL: http://llvm.org/viewvc/llvm-project?rev=308421&view=rev
Log:
[LV] Test once if vector trip count is zero, instead of twice

Generate a single test to decide if there are enough iterations to jump to the
vectorized loop, or else go to the scalar remainder loop. This test compares the
Scalar Trip Count: if STC < VF * UF go to the scalar loop. If
requiresScalarEpilogue() holds, at-least one iteration must remain scalar; the
rest can be used to form vector iterations. So in this case the test checks
instead if (STC - 1) < VF * UF by comparing STC <= VF * UF, and going to the
scalar loop if so. Otherwise the vector loop is entered for at-least one vector
iteration.

This test covers the case where incrementing the backedge-taken count will
overflow leading to an incorrect trip count of zero. In this (rare) case we will
also avoid the vector loop and jump to the scalar loop.

This patch simplifies the existing tests and effectively removes the basic-block
originally named "min.iters.checked", leaving the single test in block
"vector.ph".

Original observation and initial patch by Evgeny Stupachenko.

Differential Revision: https://reviews.llvm.org/D34150

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
    llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll
    llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll
    llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll
    llvm/trunk/test/Transforms/LoopVectorize/if-conversion-nest.ll
    llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll
    llvm/trunk/test/Transforms/LoopVectorize/induction.ll
    llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
    llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses.ll
    llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
    llvm/trunk/test/Transforms/LoopVectorize/miniters.ll
    llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll
    llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Jul 18 22:16:39 2017
@@ -574,11 +574,9 @@ protected:
   /// Returns (and creates if needed) the trip count of the widened loop.
   Value *getOrCreateVectorTripCount(Loop *NewLoop);
 
-  /// Emit a bypass check to see if the trip count would overflow, or we
-  /// wouldn't have enough iterations to execute one vector loop.
+  /// Emit a bypass check to see if the vector trip count is zero, including if
+  /// it overflows.
   void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
-  /// Emit a bypass check to see if the vector trip count is nonzero.
-  void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
   /// Emit a bypass check to see if all of the SCEV assumptions we've
   /// had to make are correct.
   void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
@@ -3289,37 +3287,16 @@ void InnerLoopVectorizer::emitMinimumIte
   BasicBlock *BB = L->getLoopPreheader();
   IRBuilder<> Builder(BB->getTerminator());
 
-  // Generate code to check that the loop's trip count that we computed by
-  // adding one to the backedge-taken count will not overflow.
-  Value *CheckMinIters = Builder.CreateICmpULT(
-      Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
+  // Generate code to check if the loop's trip count is less than VF * UF, or
+  // equal to it in case a scalar epilogue is required; this implies that the
+  // vector trip count is zero. This check also covers the case where adding one
+  // to the backedge-taken count overflowed leading to an incorrect trip count
+  // of zero. In this case we will also jump to the scalar loop.
+  auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE
+                                           : ICmpInst::ICMP_ULT;
+  Value *CheckMinIters = Builder.CreateICmp(
+      P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check");
 
-  BasicBlock *NewBB =
-      BB->splitBasicBlock(BB->getTerminator(), "min.iters.checked");
-  // Update dominator tree immediately if the generated block is a
-  // LoopBypassBlock because SCEV expansions to generate loop bypass
-  // checks may query it before the current function is finished.
-  DT->addNewBlock(NewBB, BB);
-  if (L->getParentLoop())
-    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
-  ReplaceInstWithInst(BB->getTerminator(),
-                      BranchInst::Create(Bypass, NewBB, CheckMinIters));
-  LoopBypassBlocks.push_back(BB);
-}
-
-void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
-                                                     BasicBlock *Bypass) {
-  Value *TC = getOrCreateVectorTripCount(L);
-  BasicBlock *BB = L->getLoopPreheader();
-  IRBuilder<> Builder(BB->getTerminator());
-
-  // Now, compare the new count to zero. If it is zero skip the vector loop and
-  // jump to the scalar loop.
-  Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
-                                    "cmp.zero");
-
-  // Generate code to check that the loop's trip count that we computed by
-  // adding one to the backedge-taken count will not overflow.
   BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
   // Update dominator tree immediately if the generated block is a
   // LoopBypassBlock because SCEV expansions to generate loop bypass
@@ -3328,7 +3305,7 @@ void InnerLoopVectorizer::emitVectorLoop
   if (L->getParentLoop())
     L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
   ReplaceInstWithInst(BB->getTerminator(),
-                      BranchInst::Create(Bypass, NewBB, Cmp));
+                      BranchInst::Create(Bypass, NewBB, CheckMinIters));
   LoopBypassBlocks.push_back(BB);
 }
 
@@ -3477,14 +3454,13 @@ void InnerLoopVectorizer::createVectoriz
 
   Value *StartIdx = ConstantInt::get(IdxTy, 0);
 
-  // We need to test whether the backedge-taken count is uint##_max. Adding one
-  // to it will cause overflow and an incorrect loop trip count in the vector
-  // body. In case of overflow we want to directly jump to the scalar remainder
-  // loop.
-  emitMinimumIterationCountCheck(Lp, ScalarPH);
   // Now, compare the new count to zero. If it is zero skip the vector loop and
-  // jump to the scalar loop.
-  emitVectorLoopEnteredCheck(Lp, ScalarPH);
+  // jump to the scalar loop. This check also covers the case where the
+  // backedge-taken count is uint##_max: adding one to it will overflow leading
+  // to an incorrect trip count of zero. In this (rare) case we will also jump
+  // to the scalar loop.
+  emitMinimumIterationCountCheck(Lp, ScalarPH);
+
   // Generate the code to check any assumptions that we've made for SCEV
   // expressions.
   emitSCEVChecks(Lp, ScalarPH);
@@ -3527,7 +3503,7 @@ void InnerLoopVectorizer::createVectoriz
       // We know what the end value is.
       EndValue = CountRoundDown;
     } else {
-      IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+      IRBuilder<> B(Lp->getLoopPreheader()->getTerminator());
       Type *StepType = II.getStep()->getType();
       Instruction::CastOps CastOp =
         CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
@@ -4168,7 +4144,7 @@ void InnerLoopVectorizer::fixReduction(P
   // To do so, we need to generate the 'identity' vector and override
   // one of the elements with the incoming scalar reduction. We need
   // to do it in the vector-loop preheader.
-  Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
+  Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
 
   // This is the vector-clone of the value that leaves the loop.
   Type *VecTy = getOrCreateVectorValue(LoopExitInst, 0)->getType();

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll Tue Jul 18 22:16:39 2017
@@ -86,10 +86,10 @@ for.end:
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = icmp sgt i64 %n, 1
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i64 %n, i64 1
-; AUTO_VEC:         br i1 {{.*}}, label %for.body, label %min.iters.checked
-; AUTO_VEC:       min.iters.checked:
+; AUTO_VEC:         br i1 {{.*}}, label %for.body, label %vector.ph
+; AUTO_VEC:       vector.ph:
 ; AUTO_VEC-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
-; AUTO_VEC:         br i1 {{.*}}, label %for.body, label %vector.body
+; AUTO_VEC:         br label %vector.body
 ; AUTO_VEC:       middle.block:
 ; AUTO_VEC:         [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -1
 ; AUTO_VEC-NEXT:    [[CAST_CMO:%.*]] = sitofp i64 [[TMP11]] to double

Modified: llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/debugloc.ll Tue Jul 18 22:16:39 2017
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8
 ; Make sure we are preserving debug info in the vectorized code.
 
 ; CHECK: for.body.lr.ph
-; CHECK:   cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !{{[0-9]+}}
+; CHECK:   min.iters.check = icmp ult i64 {{.*}}, 2, !dbg !{{[0-9]+}}
 ; CHECK: vector.body
 ; CHECK:   index {{.*}}, !dbg ![[LOC:[0-9]+]]
 ; CHECK:   getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC]]

Modified: llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll Tue Jul 18 22:16:39 2017
@@ -22,7 +22,7 @@ target datalayout = "e-m:e-i64:64-i128:1
 ; CHECK:       middle.block:
 ; CHECK:         %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
 ; CHECK:       scalar.ph:
-; CHECK:         %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ]
+; CHECK:         %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %for.preheader ]
 ; CHECK:       scalar.body:
 ; CHECK:         %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
 ;
@@ -79,7 +79,7 @@ for.exit:
 ; CHECK:       middle.block:
 ; CHECK:         %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
 ; CHECK:       scalar.ph:
-; CHECK:         %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ]
+; CHECK:         %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %for.preheader ]
 ; CHECK:       scalar.body:
 ; CHECK:         %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
 ;
@@ -144,7 +144,7 @@ scalar.body:
 ; CHECK:       middle.block:
 ; CHECK:         %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3
 ; CHECK:       scalar.ph:
-; CHECK:         %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ]
+; CHECK:         %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %for.preheader ]
 ; CHECK:       scalar.body:
 ; CHECK:         %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
 ;
@@ -288,7 +288,7 @@ for.cond.cleanup3:
 
 ; UNROLL-NO-IC-LABEL: @PR30183(
 ; UNROLL-NO-IC:       vector.ph:
-; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
+; UNROLL-NO-IC:         [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
 ; UNROLL-NO-IC-NEXT:    br label %vector.body
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]

Modified: llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll Tue Jul 18 22:16:39 2017
@@ -15,7 +15,7 @@
 
 ; VEC4_INTERL1-LABEL: @fp_iv_loop1(
 ; VEC4_INTERL1:       vector.ph:
-; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1:         [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
 ; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
 ; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
 ; VEC4_INTERL1-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -37,7 +37,7 @@
 
 ; VEC4_INTERL2-LABEL: @fp_iv_loop1(
 ; VEC4_INTERL2:       vector.ph:
-; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL2:         [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -63,7 +63,7 @@
 
 ; VEC1_INTERL2-LABEL: @fp_iv_loop1(
 ; VEC1_INTERL2:       vector.ph:
-; VEC1_INTERL2-NEXT:    br label %vector.body
+; VEC1_INTERL2:         br label %vector.body
 ; VEC1_INTERL2:       vector.body:
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
@@ -115,7 +115,7 @@ for.end:
 
 ; VEC4_INTERL1-LABEL: @fp_iv_loop2(
 ; VEC4_INTERL1:       vector.ph:
-; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1:         [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
 ; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
 ; VEC4_INTERL1-NEXT:    [[INDUCTION2:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
 ; VEC4_INTERL1-NEXT:    br label %vector.body
@@ -172,7 +172,7 @@ for.end:
 ; VEC4_INTERL1:       for.body.lr.ph:
 ; VEC4_INTERL1:         [[TMP0:%.*]] = load float, float* @fp_inc, align 4
 ; VEC4_INTERL1:       vector.ph:
-; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1:         [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
 ; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
 ; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
 ; VEC4_INTERL1-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -250,7 +250,7 @@ for.end:
 
 ; VEC4_INTERL1-LABEL: @fp_iv_loop4(
 ; VEC4_INTERL1:       vector.ph:
-; VEC4_INTERL1-NEXT:    br label %vector.body
+; VEC4_INTERL1:         br label %vector.body
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
@@ -289,7 +289,7 @@ for.end:
 
 ; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ], [ 0, %min.iters.checked ]
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*

Modified: llvm/trunk/test/Transforms/LoopVectorize/if-conversion-nest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/if-conversion-nest.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/if-conversion-nest.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/if-conversion-nest.ll Tue Jul 18 22:16:39 2017
@@ -13,24 +13,21 @@ define i32 @foo(i32* nocapture %A, i32*
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
-; CHECK:       min.iters.checked:
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
-; CHECK-NEXT:    br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]]
-; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
 ; CHECK-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[N]], 3
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -55,10 +52,10 @@ define i32 @foo(i32* nocapture %A, i32*
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]

Modified: llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction-step.ll Tue Jul 18 22:16:39 2017
@@ -15,7 +15,7 @@
 ; CHECK:       for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @int_inc, align 4
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
+; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
@@ -86,7 +86,7 @@ for.end:
 
 ; CHECK-LABEL: @induction_with_loop_inv(
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
+; CHECK:         [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 %j.012, i32 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer

Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Tue Jul 18 22:16:39 2017
@@ -501,13 +501,13 @@ define i32 @i16_loop() nounwind readnone
 ; condition and branch directly to the scalar loop.
 
 ; CHECK-LABEL: max_i32_backedgetaken
-; CHECK:  br i1 true, label %scalar.ph, label %min.iters.checked
+; CHECK:  br i1 true, label %scalar.ph, label %vector.ph
 
 ; CHECK: middle.block:
 ; CHECK:  %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
 ; CHECK: scalar.ph:
 ; CHECK:  %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
-; CHECK:  %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
+; CHECK:  %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ %[[v9]], %middle.block ]
 
 define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 

Modified: llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll Tue Jul 18 22:16:39 2017
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-i128:1
 ;
 ; CHECK-LABEL: @interleaved_with_cond_store_0(
 ;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
@@ -58,7 +58,7 @@ for.end:
 ;
 ; CHECK-LABEL: @interleaved_with_cond_store_1(
 ;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
@@ -117,7 +117,7 @@ for.end:
 ;
 ; CHECK-LABEL: @interleaved_with_cond_store_2(
 ;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf

Modified: llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/interleaved-accesses.ll Tue Jul 18 22:16:39 2017
@@ -338,7 +338,7 @@ for.body:
 ; }
 
 ; CHECK-LABEL: @even_load_dynamic_tc(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
 ; CHECK:   %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -579,7 +579,7 @@ for.body:
 ; }
 
 ; CHECK-LABEL: @PR27626_0(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 3
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -627,7 +627,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: @PR27626_1(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 3
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -680,7 +680,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: @PR27626_2(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 3
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -728,7 +728,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: @PR27626_3(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
 ; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 3
 ; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
 ; CHECK:   %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf

Modified: llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll Tue Jul 18 22:16:39 2017
@@ -135,7 +135,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @PR30742
-; CHECK: min.iters.checked
+; CHECK: vector.ph
 ; CHECK:   %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
 ; CHECK:   %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
 ; CHECK: middle.block

Modified: llvm/trunk/test/Transforms/LoopVectorize/miniters.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/miniters.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/miniters.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/miniters.ll Tue Jul 18 22:16:39 2017
@@ -10,10 +10,10 @@ target datalayout = "e-m:e-i64:64-f80:12
 ; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
 ; CHECK-LABEL: foo(
 ; CHECK: %min.iters.check = icmp ult i64 %N, 4
-; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
 ; UNROLL-LABEL: foo(
 ; UNROLL: %min.iters.check = icmp ult i64 %N, 8
-; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
 
 define void @foo(i64 %N) {
 entry:

Modified: llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check-readonly.ll Tue Jul 18 22:16:39 2017
@@ -4,7 +4,6 @@ target datalayout = "e-p:64:64:64-i1:8:8
 
 ;CHECK-LABEL: @add_ints(
 ;CHECK: br
-;CHECK: br
 ;CHECK: getelementptr
 ;CHECK-DAG: getelementptr
 ;CHECK-DAG: icmp ugt

Modified: llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll?rev=308421&r1=308420&r2=308421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/runtime-check.ll Tue Jul 18 22:16:39 2017
@@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8
 
 ;CHECK-LABEL: define i32 @foo
 ;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
+;CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
 ;CHECK: vector.memcheck:
 ;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
 ;CHECK: load <4 x float>