[llvm] r358056 - [VPLAN] Minor improvement to testing and debug messages.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 10 01:17:28 PDT 2019


Author: fhahn
Date: Wed Apr 10 01:17:28 2019
New Revision: 358056

URL: http://llvm.org/viewvc/llvm-project?rev=358056&view=rev
Log:
[VPLAN] Minor improvement to testing and debug messages.

1. Use computed VF for stress testing.
2. If the computed VF does not produce vector code (VF smaller than 2), force VF to be 4.
3. Test vectorization of i64 data on AArch64 to make sure we generate VF != 4 (on X86 that was already tested on AVX).

Patch by Francesco Petrogalli <francesco.petrogalli at arm.com>

Differential Revision: https://reviews.llvm.org/D59952

Added:
    llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
    llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=358056&r1=358055&r2=358056&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Apr 10 01:17:28 2019
@@ -6117,17 +6117,20 @@ LoopVectorizationPlanner::planInVPlanNat
     // If the user doesn't provide a vectorization factor, determine a
     // reasonable one.
     if (!UserVF) {
-      // We set VF to 4 for stress testing.
-      if (VPlanBuildStressTest)
+      VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+      LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
+
+      // Make sure we have a VF > 1 for stress testing.
+      if (VPlanBuildStressTest && VF < 2) {
+        LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
+                          << "overriding computed VF.\n");
         VF = 4;
-      else
-        VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+      }
     }
-
     assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
     assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
-    LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user VF " : "computed VF ")
-                      << VF << " to build VPlans.\n");
+    LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user " : "") << "VF " << VF
+                      << " to build VPlans.\n");
     buildVPlans(VF, VF);
 
     // For VPlan build stress testing, we bail out after VPlan construction.

Modified: llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll?rev=358056&r1=358055&r2=358056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll Wed Apr 10 01:17:28 2019
@@ -16,6 +16,7 @@
 ; }
 ;
 
+; CHECK-LABEL: @foo_i32(
 ; CHECK-LABEL: vector.ph:
 ; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
 ; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
@@ -48,8 +49,11 @@
 @arr2 = external global [8 x i32], align 16
 @arr = external global [8 x [8 x i32]], align 16
 
+ at arrX = external global [8 x i64], align 16
+ at arrY = external global [8 x [8 x i64]], align 16
+
 ; Function Attrs: norecurse nounwind uwtable
-define void @foo(i32 %n) {
+define void @foo_i32(i32 %n) {
 entry:
   br label %for.body
 
@@ -79,5 +83,62 @@ for.end10:
   ret void
 }
 
+; CHECK-LABEL: @foo_i64(
+; CHECK-LABEL: vector.ph:
+; CHECK: %[[SplatVal:.*]] = insertelement <2 x i64> undef, i64 %n, i32 0
+; CHECK: %[[Splat:.*]] = shufflevector <2 x i64> %[[SplatVal]], <2 x i64> undef, <2 x i32> zeroinitializer
+
+; CHECK-LABEL: vector.body:
+; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[VecInd]], <2 x i64*> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
+; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]]
+; CHECK: br label %[[InnerLoop:.+]]
+
+; CHECK: [[InnerLoop]]:
+; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[StoreVal]], <2 x i64*> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
+; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
+; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
+; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
+; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; CHECK: [[ForInc]]:
+; CHECK: %[[IndNext]] = add i64 %[[Ind]], 2
+; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], <i64 2, i64 2>
+; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
+; Function Attrs: norecurse nounwind uwtable
+define void @foo_i64(i64 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+  %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, i64 %indvars.iv21
+  store i64 %indvars.iv21, i64* %arrayidx, align 4
+  %add = add nsw i64 %indvars.iv21, %n
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i64 %add, i64* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3
+  %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+  %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+  br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+
 !1 = distinct !{!1, !2}
 !2 = !{!"llvm.loop.vectorize.enable", i1 true}

Modified: llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll?rev=358056&r1=358055&r2=358056&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll Wed Apr 10 01:17:28 2019
@@ -73,7 +73,7 @@ for.end15:
 ; CHECK-LABEL: case2
 ; CHECK: LV: Loop hints: force=enabled width=0 unroll=0
 ; CHECK: LV: We can vectorize this outer loop!
-; CHECK: LV: Using computed VF 1 to build VPlans.
+; CHECK: LV: Using VF 1 to build VPlans.
 
 define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:

Added: llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll?rev=358056&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll Wed Apr 10 01:17:28 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s  -S -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=loop-vectorize -disable-output 2>&1  | FileCheck %s
+
+; This test checks that, when stress testing VPlan, if the computed VF
+; is 1, we override it to VF = 4.
+
+; CHECK: LV: VPlan computed VF 1.
+; CHECK: LV: VPlan stress testing: overriding computed VF.
+; CHECK: LV: Using VF 4 to build VPlans.
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32 %n) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc8, %entry
+  %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %0 = trunc i64 %indvars.iv21 to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %1 = trunc i64 %indvars.iv21 to i32
+  %add = add nsw i32 %1, %n
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, i32* %arrayidx7, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8:                                         ; preds = %for.body3
+  %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+  %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+  br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10:                                        ; preds = %for.inc8
+  ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}




More information about the llvm-commits mailing list