[llvm] r245952 - The patch replace the overflow check in loop vectorization with the minimum loop iterations check.

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 25 09:43:48 PDT 2015


Author: wmi
Date: Tue Aug 25 11:43:47 2015
New Revision: 245952

URL: http://llvm.org/viewvc/llvm-project?rev=245952&view=rev
Log:
The patch replace the overflow check in loop vectorization with the minimum loop iterations check.

The loop minimum iterations check below ensures the loop has enough trip count so the generated
vector loop will likely be executed, and it covers the overflow check.

Differential Revision: http://reviews.llvm.org/D12107.

Added:
    llvm/trunk/test/Transforms/LoopVectorize/miniters.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/induction.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=245952&r1=245951&r2=245952&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Aug 25 11:43:47 2015
@@ -2619,7 +2619,7 @@ void InnerLoopVectorizer::createEmptyLoo
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
-       [ ] <-- Back-edge taken count overflow check.
+       [ ] <-- loop iteration number check.
     /   |
    /    v
   |    [ ] <-- vector loop bypass (may consist of multiple blocks).
@@ -2683,21 +2683,25 @@ void InnerLoopVectorizer::createEmptyLoo
   // Notice that the pre-header does not change, only the loop body.
   SCEVExpander Exp(*SE, DL, "induction");
 
-  // We need to test whether the backedge-taken count is uint##_max. Adding one
-  // to it will cause overflow and an incorrect loop trip count in the vector
-  // body. In case of overflow we want to directly jump to the scalar remainder
-  // loop.
-  Value *BackedgeCount =
-      Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
-                        VectorPH->getTerminator());
-  if (BackedgeCount->getType()->isPointerTy())
-    BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
-                                                "backedge.ptrcnt.to.int",
-                                                VectorPH->getTerminator());
-  Instruction *CheckBCOverflow =
-      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
-                      Constant::getAllOnesValue(BackedgeCount->getType()),
-                      "backedge.overflow", VectorPH->getTerminator());
+  // The loop minimum iterations check below is to ensure the loop has enough
+  // trip count so the generated vector loop will likely be executed and the
+  // preparation and rounding-off costs will likely be worthy.
+  //
+  // The minimum iteration check also covers case where the backedge-taken
+  // count is uint##_max.  Adding one to it will cause overflow and an
+  // incorrect loop trip count being generated in the vector body. In this
+  // case we also want to directly jump to the scalar remainder loop.
+  Value *ExitCountValue = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+                                            VectorPH->getTerminator());
+  if (ExitCountValue->getType()->isPointerTy())
+    ExitCountValue = CastInst::CreatePointerCast(ExitCountValue, IdxTy,
+                                                 "exitcount.ptrcnt.to.int",
+                                                 VectorPH->getTerminator());
+
+  Instruction *CheckMinIters =
+      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, ExitCountValue,
+                      ConstantInt::get(ExitCountValue->getType(), VF * UF),
+                      "min.iters.check", VectorPH->getTerminator());
 
   // The loop index does not have to start at Zero. Find the original start
   // value from the induction PHI node. If we don't have an induction variable
@@ -2749,15 +2753,14 @@ void InnerLoopVectorizer::createEmptyLoo
   // times the unroll factor (num of SIMD instructions).
   Constant *Step = ConstantInt::get(IdxTy, VF * UF);
 
-  // Generate code to check that the loop's trip count that we computed by
-  // adding one to the backedge-taken count will not overflow.
+  // Generate code to check that the loop's trip count is not less than the
+  // minimum loop iteration number threshold.
   BasicBlock *NewVectorPH =
-      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
+      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "min.iters.checked");
   if (ParentLoop)
     ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
-  ReplaceInstWithInst(
-      VectorPH->getTerminator(),
-      BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
+  ReplaceInstWithInst(VectorPH->getTerminator(),
+                      BranchInst::Create(ScalarPH, NewVectorPH, CheckMinIters));
   VectorPH = NewVectorPH;
 
   // This is the IR builder that we use to add all of the logic for bypassing

Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=245952&r1=245951&r2=245952&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Tue Aug 25 11:43:47 2015
@@ -113,8 +113,8 @@ define i32 @i16_loop() nounwind readnone
 ; condition and branch directly to the scalar loop.
 
 ; CHECK-LABEL: max_i32_backedgetaken
-; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
-; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+; CHECK:  %min.iters.check = icmp ult i32 0, 2
+; CHECK:  br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
 
 ; CHECK: scalar.ph:
 ; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]

Added: llvm/trunk/test/Transforms/LoopVectorize/miniters.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/miniters.ll?rev=245952&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/miniters.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/miniters.ll Tue Aug 25 11:43:47 2015
@@ -0,0 +1,45 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at b = common global [1000 x i32] zeroinitializer, align 16
+ at c = common global [1000 x i32] zeroinitializer, align 16
+ at a = common global [1000 x i32] zeroinitializer, align 16
+
+; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
+; CHECK-LABEL: foo(
+; CHECK: %min.iters.check = icmp ult i64 %N, 4
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL-LABEL: foo(
+; UNROLL: %min.iters.check = icmp ult i64 %N, 8
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+
+define void @foo(i64 %N) {
+entry:
+  %cmp.8 = icmp sgt i64 %N, 0
+  br i1 %cmp.8, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
+  %tmp = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
+  %tmp1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp1, %tmp
+  %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i64 %i.09, 1
+  %exitcond = icmp eq i64 %inc, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}




More information about the llvm-commits mailing list