[llvm] f8ba90d - [LV] Add test case that was supposed to go with D67948

Fri Nov 1 02:54:33 PDT 2019

Hello Craig,

It looks like this test is failing on all of the Arm and AArch64
buildbots and the Hexagon buildbot

First failure: http://lab.llvm.org:8011/builders/clang-cmake-armv7-quick/builds/11215
http://lab.llvm.org:8011/builders/clang-hexagon-elf/builds/27318

Can you take a look?

I ran the test on a local Arm machine and it is the third out of the
three tests.
error: CHECK: expected string not found in input
; CHECK: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
         ^
<stdin>:37:2: note: scanning from here
 %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [
%vec.ind.next, %vector.body ]
 ^
<stdin>:37:2: note: with "INDEX_NEXT" equal to "%index\\.next"
 %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [
%vec.ind.next, %vector.body ]
 ^
<stdin>:37:2: note: with "INDEX" equal to "%index"
 %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [
%vec.ind.next, %vector.body ]
 ^
<stdin>:39:3: note: possible intended match here
 %0 = add i64 %index, 0
  ^

The output on the machine is:
; ModuleID = '<stdin>'
source_filename = "<stdin>"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@a = dso_local global [5 x i32] zeroinitializer, align 16
@b = dso_local global [5 x i32] zeroinitializer, align 16

define dso_local void @_Z3fooi(i32 %M) local_unnamed_addr {
entry:
  %cmp8 = icmp sgt i32 %M, 0
  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %M to i64
  %min.iters.check = icmp ult i64 %wide.trip.count, 2
  br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:                                  ; preds = %for.body.preheader
  %scevgep = getelementptr [5 x i32], [5 x i32]* @a, i64 0, i64 %wide.trip.count
  %scevgep1 = bitcast i32* %scevgep to i8*
  %scevgep2 = getelementptr [5 x i32], [5 x i32]* @b, i64 0, i64
%wide.trip.count
  %scevgep23 = bitcast i32* %scevgep2 to i8*
  %bound0 = icmp ult i8* bitcast ([5 x i32]* @a to i8*), %scevgep23
  %bound1 = icmp ult i8* bitcast ([5 x i32]* @b to i8*), %scevgep1
  %found.conflict = and i1 %bound0, %bound1
  %memcheck.conflict = and i1 %found.conflict, true
  br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = %vector.memcheck
  %n.mod.vf = urem i64 %wide.trip.count, 2
  %n.vec = sub i64 %wide.trip.count, %n.mod.vf
  br label %vector.body

vector.body:                                      ; preds =
%vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [
%vec.ind.next, %vector.body ]
  %vec.ind4 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [
%vec.ind.next5, %vector.body ]
  %0 = add i64 %index, 0
  %1 = add i64 %index, 1
  %2 = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %0
  %3 = getelementptr inbounds i32, i32* %2, i32 0
  %4 = bitcast i32* %3 to <2 x i32>*
  %wide.load = load <2 x i32>, <2 x i32>* %4, align 4, !alias.scope !0
  %5 = mul nsw <2 x i32> %wide.load, %vec.ind4
  %6 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %0
  %7 = getelementptr inbounds i32, i32* %6, i32 0
  %8 = bitcast i32* %7 to <2 x i32>*
  %wide.load6 = load <2 x i32>, <2 x i32>* %8, align 4, !alias.scope
!3, !noalias !0
  %9 = add nsw <2 x i32> %wide.load6, %5
  %10 = bitcast i32* %7 to <2 x i32>*
  store <2 x i32> %9, <2 x i32>* %10, align 4, !alias.scope !3, !noalias !0
  %index.next = add i64 %index, 2
  %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
  %vec.ind.next5 = add <2 x i32> %vec.ind4, <i32 2, i32 2>
  %11 = icmp eq i64 %index.next, %n.vec
  br i1 %11, label %middle.block, label %vector.body, !llvm.loop !5

middle.block:                                     ; preds = %vector.body
  %cmp.n = icmp eq i64 %wide.trip.count, %n.vec
  br i1 %cmp.n, label %for.cond.cleanup.loopexit, label %scalar.ph

scalar.ph:                                        ; preds =
%middle.block, %vector.memcheck, %for.body.preheader
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0,
%for.body.preheader ], [ 0, %vector.memcheck ]
  br label %for.body

for.cond.cleanup.loopexit:                        ; preds =
%middle.block, %for.body
  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds =
%for.cond.cleanup.loopexit, %entry
  ret void

for.body:                                         ; preds = %for.body,
%scalar.ph
  %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [
%indvars.iv.next, %for.body ]
  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0,
i64 %indvars.iv
  %12 = load i32, i32* %arrayidx, align 4
  %13 = trunc i64 %indvars.iv to i32
  %mul = mul nsw i32 %12, %13
  %arrayidx2 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0,
i64 %indvars.iv
  %14 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %14, %mul
  store i32 %add, i32* %arrayidx2, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body,
!prof !7, !llvm.loop !8
}

!0 = !{!1}
!1 = distinct !{!1, !2}
!2 = distinct !{!2, !"LVerDomain"}
!3 = !{!4}
!4 = distinct !{!4, !2}
!5 = distinct !{!5, !6}
!6 = !{!"llvm.loop.isvectorized", i32 1}
!7 = !{!"branch_weights", i32 1, i32 5}
!8 = distinct !{!8, !6}

On Thu, 31 Oct 2019 at 22:15, Craig Topper via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Craig Topper
> Date: 2019-10-31T15:11:26-07:00
> New Revision: f8ba90d448c6703809340b3fdb90e733b6bc2d33
>
> URL: https://github.com/llvm/llvm-project/commit/f8ba90d448c6703809340b3fdb90e733b6bc2d33
> DIFF: https://github.com/llvm/llvm-project/commit/f8ba90d448c6703809340b3fdb90e733b6bc2d33.diff
>
> LOG: [LV] Add test case that was supposed to go with D67948
>
> I forgot to git add it when I committed for Evgeniy.
>
> Added:
>     llvm/test/Transforms/LoopVectorize/interleave_short_tc.ll
>
> Modified:
>
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/llvm/test/Transforms/LoopVectorize/interleave_short_tc.ll b/llvm/test/Transforms/LoopVectorize/interleave_short_tc.ll
> new file mode 100644
> index 000000000000..78e69cb2d647
> --- /dev/null
> +++ b/llvm/test/Transforms/LoopVectorize/interleave_short_tc.ll
> @@ -0,0 +1,59 @@
> +; Check that we won't interleave by more than "best known" estimated trip count.
> +
> +; The loop is expected to be vectorized by 4 and interleaving suppresed due to
> +; short trip count which is controled by "tiny-trip-count-interleave-threshold".
> +; RUN: opt  -passes=loop-vectorize -force-vector-width=4 -vectorizer-min-trip-count=4 -S < %s |  FileCheck %s
> +;
> +; The loop is expected to be vectorized by 4 and computed interleaving factor is 1.
> +; Thus the resulting step is 4.
> +; RUN: opt  -passes=loop-vectorize -force-vector-width=4 -vectorizer-min-trip-count=4 -tiny-trip-count-interleave-threshold=4 -S < %s |  FileCheck %s
> +
> +; The loop is expected to be vectorized by 2 and computed interleaving factor is 2.
> +; Thus the resulting step is 4.
> +; RUN: opt  -passes=loop-vectorize -force-vector-width=2 -vectorizer-min-trip-count=4 -tiny-trip-count-interleave-threshold=4 -S < %s |  FileCheck %s
> +
> +; Check that we won't interleave by more than "best known" estimated trip count.
> +
> +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> + at a = dso_local global [5 x i32] zeroinitializer, align 16
> + at b = dso_local global [5 x i32] zeroinitializer, align 16
> +
> +; Function Attrs: nofree norecurse nounwind uwtable
> +define dso_local void @_Z3fooi(i32 %M) local_unnamed_addr {
> +; CHECK-LABEL: @_Z3fooi(
> +; CHECK:       [[VECTOR_BODY:vector\.body]]:
> +; CHECK:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
> +; CHECK:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
> +;
> +entry:
> +  %cmp8 = icmp sgt i32 %M, 0
> +  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
> +
> +for.body.preheader:                               ; preds = %entry
> +  %wide.trip.count = zext i32 %M to i64
> +  br label %for.body
> +
> +for.cond.cleanup.loopexit:                        ; preds = %for.body
> +  br label %for.cond.cleanup
> +
> +for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
> +  ret void
> +
> +for.body:                                         ; preds = %for.body, %for.body.preheader
> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
> +  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %indvars.iv
> +  %0 = load i32, i32* %arrayidx, align 4
> +  %1 = trunc i64 %indvars.iv to i32
> +  %mul = mul nsw i32 %0, %1
> +  %arrayidx2 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %indvars.iv
> +  %2 = load i32, i32* %arrayidx2, align 4
> +  %add = add nsw i32 %2, %mul
> +  store i32 %add, i32* %arrayidx2, align 4
> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> +  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
> +  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !prof !1
> +}
> +
> +!1 = !{!"branch_weights", i32 1, i32 5}
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits