[PATCH] Implement pragma llvm.vectorizer.enable in LoopVectorizer
Renato Golin
renato.golin at linaro.org
Thu Nov 28 17:42:20 PST 2013
Hi nadav,
This is a simple implementation of #pragma llvm.vectorizer.enable that forces the vectorizer to still transform the loop, even if the scalar cost is still cheaper than any vectorized ones.
The second side-effect of this pragma, turn on vectorizer on that loop only, if the vectorizer is not enabled (ex. -O1, -O0, -Oz), is not implemented by this patch.
The front-end part (in Clang) is also not implemented and to come on a separate patch, once the two side effects of this pragma are implemented.
See http://llvm.org/PR18086 for more info.
http://llvm-reviews.chandlerc.com/D2289
Files:
lib/Transforms/Vectorize/LoopVectorize.cpp
test/Transforms/LoopVectorize/metadata-enable.ll
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -689,9 +689,10 @@
/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every power of two up to VF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
- /// possible.
+ /// possible. If Force is true, return the most profitable non-unit factor.
VectorizationFactor selectVectorizationFactor(bool OptForSize,
- unsigned UserVF);
+ unsigned UserVF,
+ bool Force);
/// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as
@@ -763,10 +764,13 @@
unsigned Width;
/// Vectorization unroll factor.
unsigned Unroll;
+ /// Vectorization forced enabled
+ bool Force;
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
, Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
+ , Force(false)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -877,6 +881,11 @@
Unroll = Val;
else
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else if (Hint == "enable") {
+ if (C->getBitWidth() == 1)
+ Force = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
@@ -960,7 +969,7 @@
// Select the optimal vectorization factor.
LoopVectorizationCostModel::VectorizationFactor VF;
- VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
+ VF = CM.selectVectorizationFactor(OptForSize, Hints.Width, Hints.Force);
// Select the unroll factor.
unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
VF.Cost);
@@ -4387,7 +4396,8 @@
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
- unsigned UserVF) {
+ unsigned UserVF,
+ bool Force) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
@@ -4451,22 +4461,39 @@
return Factor;
}
- float Cost = expectedCost(1);
+ float ScalarCost = expectedCost(1);
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
+ // Calculating only the cost of vectorized loops, so we can compare later
+ // with the cost of the scalar and return, but if the pragma forced
+ // vectorization, we need to return the lowest VF != 1.
+ float Cost = 0.0;
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
(int)VectorCost << ".\n");
- if (VectorCost < Cost) {
+ if (Cost == 0.0) {
+ Cost = VectorCost;
+ Width = i;
+ } else if (VectorCost < Cost) {
Cost = VectorCost;
Width = i;
}
}
+ if (!Force) {
+ // Compare again with scalar, since we're not forcing
+ if (ScalarCost < Cost) {
+ Cost = ScalarCost;
+ Width = 1;
+ }
+ } else {
+ DEBUG(dbgs() << "LV: #pragma vecorize enable, ignore scalar costs\n");
+ }
+
DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
Factor.Width = Width;
Factor.Cost = Width * Cost;
Index: test/Transforms/LoopVectorize/metadata-enable.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/metadata-enable.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -S | FileCheck %s
+
+; This file tests the llvm.vectorizer.pragma forcing an unprofitable loop to
+; vectorize. Here are some of the costs the vectorizer found:
+; LV: Scalar loop costs: 6.
+; LV: Vector loop of width 2 costs: 42.
+; LV: Vector loop of width 4 costs: 41.
+; LV: Vector loop of width 8 costs: 6.
+; LV: Selecting VF = : 8.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @test1(
+; CHECK: store <8 x i32>
+; CHECK: ret i32
+
+define i32 @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %div = sdiv i32 255, %0
+ %div1 = sdiv i32 %div, %N
+ %arrayidx3 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ store i32 %div1, i32* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %for.body
+ %1 = load i32* %a, align 4, !tbaa !1
+ ret i32 %1
+}
+
+!0 = metadata !{metadata !0, metadata !1}
+!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1}
\ No newline at end of file
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2289.1.patch
Type: text/x-patch
Size: 6047 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131128/f624890d/attachment.bin>
More information about the llvm-commits
mailing list