[llvm] r357156 - [VPlan] Determine Vector Width programmatically.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 03:37:12 PDT 2019
Author: fhahn
Date: Thu Mar 28 03:37:12 2019
New Revision: 357156
URL: http://llvm.org/viewvc/llvm-project?rev=357156&view=rev
Log:
[VPlan] Determine Vector Width programmatically.
With this change, the VPlan native path is triggered with the directive:
#pragma clang loop vectorize(enable)
There is no need to specify the vectorize_width(N) clause.
Patch by Francesco Petrogalli <francesco.petrogalli at arm.com>
Differential Revision: https://reviews.llvm.org/D57598
Added:
llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
llvm/trunk/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h?rev=357156&r1=357155&r2=357156&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h Thu Mar 28 03:37:12 2019
@@ -174,6 +174,10 @@ struct VectorizationFactor {
// Width 1 means no vectorization, cost 0 means uncomputed cost.
static VectorizationFactor Disabled() { return {1, 0}; }
+
+ bool operator==(const VectorizationFactor &rhs) const {
+ return Width == rhs.Width && Cost == rhs.Cost;
+ }
};
/// Planner drives the vectorization process after having passed
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=357156&r1=357155&r2=357156&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Mar 28 03:37:12 2019
@@ -1383,12 +1383,6 @@ static bool isExplicitVecOuterLoop(Loop
return false;
}
- if (!Hints.getWidth()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
- Hints.emitRemarkWithHints();
- return false;
- }
-
if (Hints.getInterleave() > 1) {
// TODO: Interleave support is future work.
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
@@ -6081,31 +6075,48 @@ void LoopVectorizationCostModel::collect
}
}
+// TODO: we could return a pair of values that specify the max VF and
+// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
+// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
+// doesn't have a cost model that can choose which plan to execute if
+// more than one is generated.
+unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
+ LoopVectorizationCostModel &CM) {
+ unsigned WidestType;
+ std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
+ return WidestVectorRegBits / WidestType;
+}
+
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
unsigned UserVF) {
+ unsigned VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
if (!OrigLoop->empty()) {
- // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
- // This won't be necessary when UserVF is not required in the VPlan-native
- // path.
- if (VPlanBuildStressTest && !UserVF)
- UserVF = 4;
+ // If the user doesn't provide a vectorization factor, determine a
+ // reasonable one.
+ if (!UserVF) {
+ // We set VF to 4 for stress testing.
+ if (VPlanBuildStressTest)
+ VF = 4;
+ else
+ VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+ }
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- assert(UserVF && "Expected UserVF for outer loop vectorization.");
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- buildVPlans(UserVF, UserVF);
+ assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
+ LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user VF " : "computed VF ")
+ << VF << " to build VPlans.\n");
+ buildVPlans(VF, VF);
// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
return VectorizationFactor::Disabled();
- return {UserVF, 0};
+ return {VF, 0};
}
LLVM_DEBUG(
@@ -7128,7 +7139,7 @@ static bool processLoopInVPlanNativePath
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
// Get user vectorization factor.
- unsigned UserVF = Hints.getWidth();
+ const unsigned UserVF = Hints.getWidth();
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -7136,16 +7147,18 @@ static bool processLoopInVPlanNativePath
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
- if (VPlanBuildStressTest || EnableVPlanPredication)
+ // Also, do not attempt to vectorize if no vector code will be produced.
+ if (VPlanBuildStressTest || EnableVPlanPredication ||
+ VectorizationFactor::Disabled() == VF)
return false;
LVP.setBestPlan(VF.Width, 1);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
&CM);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
Added: llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll?rev=357156&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll Thu Mar 28 03:37:12 2019
@@ -0,0 +1,83 @@
+; RUN: opt -S -loop-vectorize -enable-vplan-native-path -mtriple aarch64-gnu-linux < %s | FileCheck %s
+
+; extern int arr[8][8];
+; extern int arr2[8];
+;
+; void foo(int n)
+; {
+; int i1, i2;
+;
+; #pragma clang loop vectorize(enable)
+; for (i1 = 0; i1 < 8; i1++) {
+; arr2[i1] = i1;
+; for (i2 = 0; i2 < 8; i2++)
+; arr[i2][i1] = i1 + n;
+; }
+; }
+;
+
+; CHECK-LABEL: vector.ph:
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
+; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
+
+; CHECK-LABEL: vector.body:
+; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
+; CHECK: br label %[[InnerLoop:.+]]
+
+; CHECK: [[InnerLoop]]:
+; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
+; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; CHECK: [[ForInc]]:
+; CHECK: %[[IndNext]] = add i64 %[[Ind]], 4
+; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
+
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32 %n) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc8, %entry
+ %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+ %0 = trunc i64 %indvars.iv21 to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = trunc i64 %indvars.iv21 to i32
+ %add = add nsw i32 %1, %n
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+ store i32 %add, i32* %arrayidx7, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 8
+ br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8: ; preds = %for.body3
+ %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+ %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+ br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10: ; preds = %for.inc8
+ ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
Added: llvm/trunk/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll?rev=357156&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll Thu Mar 28 03:37:12 2019
@@ -0,0 +1,114 @@
+; RUN: opt -S -loop-vectorize -enable-vplan-native-path -mtriple x86_64 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -enable-vplan-native-path -mtriple x86_64 -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
+; RUN: opt -S -loop-vectorize -enable-vplan-native-path -mtriple x86_64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
+
+; extern int arr[8][8];
+; extern int arr2[8];
+;
+; void foo(int n)
+; {
+; int i1, i2;
+;
+; #pragma clang loop vectorize(enable)
+; for (i1 = 0; i1 < 8; i1++) {
+; arr2[i1] = i1;
+; for (i2 = 0; i2 < 8; i2++)
+; arr[i2][i1] = i1 + n;
+; }
+; }
+;
+
+; CHECK-LABEL: vector.ph:
+; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> undef, i32 %n, i32 0
+; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> undef, <4 x i32> zeroinitializer
+
+; CHECK-LABEL: vector.body:
+; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
+; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
+; CHECK: br label %[[InnerLoop:.+]]
+
+; CHECK: [[InnerLoop]]:
+; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
+; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; CHECK: [[ForInc]]:
+; CHECK: %[[IndNext]] = add i64 %[[Ind]], 4
+; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
+
+; AVX-LABEL: vector.ph:
+; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> undef, i32 %n, i32 0
+; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> undef, <8 x i32> zeroinitializer
+
+; AVX-LABEL: vector.body:
+; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
+; AVX: %[[VecInd:.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
+; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <8 x i64> %[[VecInd]]
+; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[VecIndTr]], <8 x i32*> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
+; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]]
+; AVX: br label %[[InnerLoop:.+]]
+
+; AVX: [[InnerLoop]]:
+; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ], [ zeroinitializer, %vector.body ]
+; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[StoreVal]], <8 x i32*> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true
+; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0
+; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]
+
+; AVX: [[ForInc]]:
+; AVX: %[[IndNext]] = add i64 %[[Ind]], 8
+; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; AVX: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
+; AVX: br i1 %[[Cmp]], label %middle.block, label %vector.body
+
+ at arr2 = external global [8 x i32], align 16
+ at arr = external global [8 x [8 x i32]], align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32 %n) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc8, %entry
+ %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+ %0 = trunc i64 %indvars.iv21 to i32
+ store i32 %0, i32* %arrayidx, align 4
+ %1 = trunc i64 %indvars.iv21 to i32
+ %add = add nsw i32 %1, %n
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+ store i32 %add, i32* %arrayidx7, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 8
+ br i1 %exitcond, label %for.inc8, label %for.body3
+
+for.inc8: ; preds = %for.body3
+ %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+ %exitcond23 = icmp eq i64 %indvars.iv.next22, 8
+ br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1
+
+for.end10: ; preds = %for.inc8
+ ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
Modified: llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll?rev=357156&r1=357155&r2=357156&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/explicit_outer_detection.ll Thu Mar 28 03:37:12 2019
@@ -21,7 +21,7 @@
; CHECK-LABEL: vector_width
; CHECK: LV: Loop hints: force=enabled width=4 unroll=0
; CHECK: LV: We can vectorize this outer loop!
-; CHECK: LV: Using user VF 4.
+; CHECK: LV: Using user VF 4 to build VPlans.
; CHECK-NOT: LV: Loop hints: force=?
; CHECK-NOT: LV: Found a loop: inner.body
@@ -68,14 +68,12 @@ for.end15:
ret void
}
-; Case 2: Annotated outer loop WITHOUT vector width information doesn't have to
-; be collected.
+; Case 2: Annotated outer loop WITHOUT vector width information must be collected.
; CHECK-LABEL: case2
-; CHECK-NOT: LV: Loop hints: force=enabled
-; CHECK-NOT: LV: We can vectorize this outer loop!
-; CHECK: LV: Loop hints: force=?
-; CHECK: LV: Found a loop: inner.body
+; CHECK: LV: Loop hints: force=enabled width=0 unroll=0
+; CHECK: LV: We can vectorize this outer loop!
+; CHECK: LV: Using computed VF 1 to build VPlans.
define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
entry:
More information about the llvm-commits
mailing list