[PATCH] D22869: [LV] Generate both scalar and vector integer induction variables

Mon Aug 1 11:34:24 PDT 2016

anemet added inline comments.

================
Comment at: test/Transforms/LoopVectorize/induction.ll:604-651
@@ -539,41 +603,49 @@
 
-; IND-LABEL: nonprimary
-; IND-LABEL: vector.ph
-; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
-; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
-; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
-; IND-LABEL: vector.body:
-; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
-; IND: %index.next = add i32 %index, 2
-; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 84, i32 84>
-; IND: %[[CMP:.*]] = icmp eq i32 %index.next
-; IND: br i1 %[[CMP]]
-; UNROLL-LABEL: nonprimary
-; UNROLL-LABEL: vector.ph
-; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
-; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
-; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
-; UNROLL-LABEL: vector.body:
-; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
-; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
-; UNROLL: %index.next = add i32 %index, 4
-; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 168, i32 168>
-; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
-; UNROLL: br i1 %[[CMP]]
+; IND-LABEL: @nonprimary(
+; IND: vector.ph:
+; IND:   br label %vector.body
+; IND: vector.body:
+; IND:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND:   %[[m1:.+]] = mul i32 %index, 42
+; IND:   %offset.idx = add i32 %[[m1]], %i
+; IND:   %[[a1:.+]] = add i32 %9, %i
+; IND:   %[[a2:.+]] = add i32 %offset.idx, 42
+; IND:   %[[s1:.+]] = sext i32 %[[a1]] to i64
+; IND:   getelementptr inbounds i32, i32* %a, i64 %[[s1]]
+; IND:   %[[s2:.+]] = sext i32 %[[a2]] to i64
+; IND:   getelementptr inbounds i32, i32* %a, i64 %[[s2]]
+;
+; UNROLL-LABEL: @nonprimary(
+; UNROLL: vector.ph:
+; UNROLL:   br label %vector.body
+; UNROLL: vector.body:
+; UNROLL:   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL:   %[[m1:.+]] = mul i32 %index, 42
+; UNROLL:   %offset.idx = add i32 %[[m1]], %i
+; UNROLL:   %[[a1:.+]] = add i32 %[[m1]], %i
+; UNROLL:   %[[a2:.+]] = add i32 %offset.idx, 42
+; UNROLL:   %[[a3:.+]] = add i32 %offset.idx, 84
+; UNROLL:   %[[a4:.+]] = add i32 %offset.idx, 126
+; UNROLL:   %[[s1:.+]] = sext i32 %[[a1]] to i64
+; UNROLL:   getelementptr inbounds i32, i32* %a, i64 %[[s1]]
+; UNROLL:   %[[s2:.+]] = sext i32 %[[a2]] to i64
+; UNROLL:   getelementptr inbounds i32, i32* %a, i64 %[[s2]]
+; UNROLL:   %[[s3:.+]] = sext i32 %[[a3]] to i64
+; UNROLL:   getelementptr inbounds i32, i32* %a, i64 %[[s3]]
+; UNROLL:   %[[s4:.+]] = sext i32 %[[a4]] to i64
+; UNROLL:   getelementptr inbounds i32, i32* %a, i64 %[[s4]]
 define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
 for.body.preheader:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
   store i32 %indvars.iv, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 42
   %exitcond = icmp eq i32 %indvars.iv.next, %k
   br i1 %exitcond, label %exit, label %for.body
 
 exit:
   ret void
 }
----------------
mssimpso wrote:
> anemet wrote:
> > I could be wrong, but now this test does not seem to test what it was meant for.  I thought the point was to ensure that most of the work to get the vector IV set up is pushed into the preheader.  But now it seems that we no longer generate that?
> Yeah, I think you're right. With the current patch, the vector IV is complete removed after instcombine. We generate both a scalar one and a vector one (because of the store) during vectorization. But because the store is scalarized, instcombine is able to remove the vector IV.
> 
> If we add a pre-instcombine check for this test, we could check the original functionality as well. What do you think?
Ah, I didn't see that this was a non-consecutive store.  What if you make it consecutive to avoid the store to get scalarized (the non-zero based IV would still require us to create a new IV hopefully)?


https://reviews.llvm.org/D22869