[PATCH] D22869: [LV] Generate both scalar and vector integer induction variables
Matthew Simpson via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 1 11:20:54 PDT 2016
mssimpso added inline comments.
================
Comment at: test/Transforms/LoopVectorize/induction.ll:604-651
@@ -539,41 +603,49 @@
-; IND-LABEL: nonprimary
-; IND-LABEL: vector.ph
-; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
-; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
-; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
-; IND-LABEL: vector.body:
-; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
-; IND: %index.next = add i32 %index, 2
-; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 84, i32 84>
-; IND: %[[CMP:.*]] = icmp eq i32 %index.next
-; IND: br i1 %[[CMP]]
-; UNROLL-LABEL: nonprimary
-; UNROLL-LABEL: vector.ph
-; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
-; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
-; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
-; UNROLL-LABEL: vector.body:
-; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ]
-; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
-; UNROLL: %index.next = add i32 %index, 4
-; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 168, i32 168>
-; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
-; UNROLL: br i1 %[[CMP]]
+; IND-LABEL: @nonprimary(
+; IND: vector.ph:
+; IND: br label %vector.body
+; IND: vector.body:
+; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %[[m1:.+]] = mul i32 %index, 42
+; IND: %offset.idx = add i32 %[[m1]], %i
+; IND: %[[a1:.+]] = add i32 %9, %i
+; IND: %[[a2:.+]] = add i32 %offset.idx, 42
+; IND: %[[s1:.+]] = sext i32 %[[a1]] to i64
+; IND: getelementptr inbounds i32, i32* %a, i64 %[[s1]]
+; IND: %[[s2:.+]] = sext i32 %[[a2]] to i64
+; IND: getelementptr inbounds i32, i32* %a, i64 %[[s2]]
+;
+; UNROLL-LABEL: @nonprimary(
+; UNROLL: vector.ph:
+; UNROLL: br label %vector.body
+; UNROLL: vector.body:
+; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL: %[[m1:.+]] = mul i32 %index, 42
+; UNROLL: %offset.idx = add i32 %[[m1]], %i
+; UNROLL: %[[a1:.+]] = add i32 %[[m1]], %i
+; UNROLL: %[[a2:.+]] = add i32 %offset.idx, 42
+; UNROLL: %[[a3:.+]] = add i32 %offset.idx, 84
+; UNROLL: %[[a4:.+]] = add i32 %offset.idx, 126
+; UNROLL: %[[s1:.+]] = sext i32 %[[a1]] to i64
+; UNROLL: getelementptr inbounds i32, i32* %a, i64 %[[s1]]
+; UNROLL: %[[s2:.+]] = sext i32 %[[a2]] to i64
+; UNROLL: getelementptr inbounds i32, i32* %a, i64 %[[s2]]
+; UNROLL: %[[s3:.+]] = sext i32 %[[a3]] to i64
+; UNROLL: getelementptr inbounds i32, i32* %a, i64 %[[s3]]
+; UNROLL: %[[s4:.+]] = sext i32 %[[a4]] to i64
+; UNROLL: getelementptr inbounds i32, i32* %a, i64 %[[s4]]
define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
for.body.preheader:
br label %for.body
for.body:
%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
%arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
store i32 %indvars.iv, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i32 %indvars.iv, 42
%exitcond = icmp eq i32 %indvars.iv.next, %k
br i1 %exitcond, label %exit, label %for.body
exit:
ret void
}
----------------
anemet wrote:
> I could be wrong, but now this test does not seem to test what it was meant for. I thought the point was to ensure that most of the work to get the vector IV set up is pushed into the preheader. But now it seems that we no longer generate that?
Yeah, I think you're right. With the current patch, the vector IV is complete removed after instcombine. We generate both a scalar one and a vector one (because of the store) during vectorization. But because the store is scalarized, instcombine is able to remove the vector IV.
If we add a pre-instcombine check for this test, we could check the original functionality as well. What do you think?
https://reviews.llvm.org/D22869
More information about the llvm-commits
mailing list