[llvm] r276339 - [LV] Move vector int induction update to end of latch

Thu Jul 21 14:20:15 PDT 2016

Author: mssimpso
Date: Thu Jul 21 16:20:15 2016
New Revision: 276339

URL: http://llvm.org/viewvc/llvm-project?rev=276339&view=rev
Log:
[LV] Move vector int induction update to end of latch

This patch moves the update instruction for vectorized integer induction phi
nodes to the end of the latch block. This ensures consistent placement of all
induction updates across all the kinds of int inductions we create (scalar,
splat vector, or vector phi).

Differential Revision: https://reviews.llvm.org/D22416

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll
    llvm/trunk/test/Transforms/LoopVectorize/induction.ll
    llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=276339&r1=276338&r2=276339&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Jul 21 16:20:15 2016
@@ -1910,14 +1910,23 @@ void InnerLoopVectorizer::createVectorIn
   // factor. The last of those goes into the PHI.
   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
                                     &*LoopVectorBody->getFirstInsertionPt());
-  Value *LastInduction = VecInd;
+  Instruction *LastInduction = VecInd;
   for (unsigned Part = 0; Part < UF; ++Part) {
     Entry[Part] = LastInduction;
-    LastInduction = Builder.CreateAdd(LastInduction, SplatVF, "step.add");
+    LastInduction = cast<Instruction>(
+        Builder.CreateAdd(LastInduction, SplatVF, "step.add"));
   }
 
+  // Move the last step to the end of the latch block. This ensures consistent
+  // placement of all induction updates.
+  auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
+  auto *Br = cast<BranchInst>(LoopVectorLatch->getTerminator());
+  auto *ICmp = cast<Instruction>(Br->getCondition());
+  LastInduction->moveBefore(ICmp);
+  LastInduction->setName("vec.ind.next");
+
   VecInd->addIncoming(SteppedStart, LoopVectorPreHeader);
-  VecInd->addIncoming(LastInduction, LoopVectorBody);
+  VecInd->addIncoming(LastInduction, LoopVectorLatch);
 }
 
 void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll?rev=276339&r1=276338&r2=276339&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll Thu Jul 21 16:20:15 2016
@@ -19,8 +19,6 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ 
 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ 
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]]
@@ -137,6 +135,8 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[TMP123:%.*]] = insertelement <16 x i32*> [[TMP119]], i32* [[TMP122]], i32 15
 ; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK:         [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK:         [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 entry:
   %0 = load i32, i32* @c, align 4
   %cmp34 = icmp sgt i32 %0, 8

Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=276339&r1=276338&r2=276339&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Thu Jul 21 16:20:15 2016
@@ -437,18 +437,18 @@ entry:
 ; IND-LABEL: veciv
 ; IND: vector.body:
 ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add, %vector.body ]
-; IND: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
+; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
 ; IND: %index.next = add i32 %index, 2
+; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2>
 ; IND: %[[CMP:.*]] = icmp eq i32 %index.next
 ; IND: br i1 %[[CMP]]
 ; UNROLL-LABEL: veciv
 ; UNROLL: vector.body:
 ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add1, %vector.body ]
+; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
 ; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
-; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 4, i32 4>
 ; UNROLL: %index.next = add i32 %index, 4
+; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4>
 ; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
 ; UNROLL: br i1 %[[CMP]]
 define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
@@ -471,8 +471,8 @@ exit:
 ; IND: vector.body:
 ; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ]
-; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
 ; IND: %index.next = add i64 %index, 2
+; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
 ; IND: %[[CMP:.*]] = icmp eq i64 %index.next
 ; IND: br i1 %[[CMP]]
 define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
@@ -499,9 +499,9 @@ exit:
 ; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
 ; IND-LABEL: vector.body:
 ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add, %vector.body ]
-; IND: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
+; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
 ; IND: %index.next = add i32 %index, 2
+; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 84, i32 84>
 ; IND: %[[CMP:.*]] = icmp eq i32 %index.next
 ; IND: br i1 %[[CMP]]
 ; UNROLL-LABEL: nonprimary
@@ -511,10 +511,10 @@ exit:
 ; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
 ; UNROLL-LABEL: vector.body:
 ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add1, %vector.body ]
+; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
 ; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
-; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 168, i32 168>
 ; UNROLL: %index.next = add i32 %index, 4
+; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 168, i32 168>
 ; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
 ; UNROLL: br i1 %[[CMP]]
 define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {

Modified: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll?rev=276339&r1=276338&r2=276339&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll Thu Jul 21 16:20:15 2016
@@ -7,10 +7,11 @@ target triple = "x86_64-apple-macosx10.8
 
 ;CHECK-LABEL: @array_at_plus_one(
 ;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %step.add, %vector.body ]
-;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %step.add2, %vector.body ]
-;CHECK: add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
+;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
+;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
 ;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12>
+;CHECK: %vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
+;CHECK: %vec.ind.next2 = add <4 x i32> %vec.ind1, <i32 4, i32 4, i32 4, i32 4>
 ;CHECK: ret i32
 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0