[llvm] e3a1330 - [NFC] Add tests for scalable vectorization of loops with large stride acesses

Thu Apr 1 02:26:34 PDT 2021

Author: David Sherwood
Date: 2021-04-01T10:25:06+01:00
New Revision: e3a13304fc036f25eacee9c084294d3b80f7c5bb

URL: https://github.com/llvm/llvm-project/commit/e3a13304fc036f25eacee9c084294d3b80f7c5bb
DIFF: https://github.com/llvm/llvm-project/commit/e3a13304fc036f25eacee9c084294d3b80f7c5bb.diff

LOG: [NFC] Add tests for scalable vectorization of loops with large stride acesses

This patch just adds tests that we can vectorize loop such as these:

  for (i = 0; i < n; i++)
    dst[i * 7] += 1;

and

  for (i = 0; i < n; i++)
    if (cond[i])
      dst[i * 7] += 1;

using scalable vectors, where we expect to use gathers and scatters in the
vectorized loop. The vector of pointers used for the gather is identical
to those used for the scatter so there should be no memory dependences.

Tests are added here:

  Transforms/LoopVectorize/AArch64/sve-large-strides.ll

Differential Revision: https://reviews.llvm.org/D99192

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
new file mode 100644
index 0000000000000..e804dd3fe3daa

--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -0,0 +1,101 @@
+; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s
+
+define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
+; CHECK-LABEL: @stride7_i32(
+; CHECK:      vector.body
+; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
+; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, i32* %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[PTRS]]
+; CHECK-NEXT:   %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
+; CHECK-NEXT:   call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %mul = mul nuw nsw i64 %i.05, 7
+  %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %mul
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 3
+  store i32 %add, i32* %arrayidx, align 4
+  %inc = add nuw nsw i64 %i.05, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {
+; CHECK-LABEL: @stride7_f64(
+; CHECK:      vector.body
+; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
+; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]],
+; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %mul = mul nuw nsw i64 %i.05, 7
+  %arrayidx = getelementptr inbounds double, double* %dst, i64 %mul
+  %0 = load double, double* %arrayidx, align 8
+  %add = fadd double %0, 1.000000e+00
+  store double %add, double* %arrayidx, align 8
+  %inc = add nuw nsw i64 %i.05, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+
+define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {
+; CHECK-LABEL: @cond_stride7_f64(
+; CHECK:      vector.body
+; CHECK:        %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
+; CHECK:        %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %{{.*}}
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
+; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %i.07
+  %0 = load i64, i64* %arrayidx, align 8
+  %tobool.not = icmp eq i64 %0, 0
+  br i1 %tobool.not, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %mul = mul nsw i64 %i.07, 7
+  %arrayidx1 = getelementptr inbounds double, double* %dst, i64 %mul
+  %1 = load double, double* %arrayidx1, align 8
+  %add = fadd double %1, 1.000000e+00
+  store double %add, double* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+
+!0 = distinct !{!0, !1, !2, !3, !4, !5}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+!4 = !{!"llvm.loop.interleave.count", i32 1}
+!5 = !{!"llvm.loop.vectorize.enable", i1 true}
+!6 = distinct !{!6, !1, !7, !3, !4, !5}
+!7 = !{!"llvm.loop.vectorize.width", i32 2}