[polly] r245424 - Make prevectorization width configurable
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 01:46:12 PDT 2015
Author: grosser
Date: Wed Aug 19 03:46:11 2015
New Revision: 245424
URL: http://llvm.org/viewvc/llvm-project?rev=245424&view=rev
Log:
Make prevectorization width configurable
Polly uses 'prevectorization' to enable outer loop vectorization. When
vectorizing an outer loop, we strip-mine <number-of-prevec-dims> loop
iterations which are than interchanged to the innermost level such that LLVM's
inner loop vectorizer (or Polly's simple vectorizer) can easily vectorize this
loop. The number of loop iterations to strip-mine is now configurable with the
option -polly-prevect-width=<number-of-prevec-dims>.
This is mostly a debugging option. We should probably add a heuristic that
derives the number of prevectorization dimensions from the target data and
the data types used.
Modified:
polly/trunk/lib/Transform/ScheduleOptimizer.cpp
polly/trunk/test/ScheduleOptimizer/prevectorization.ll
Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=245424&r1=245423&r2=245424&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Wed Aug 19 03:46:11 2015
@@ -107,6 +107,12 @@ static cl::opt<std::string>
cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
+static cl::opt<int> PrevectorWidth(
+ "polly-prevect-width",
+ cl::desc(
+ "The number of loop iterations to strip-mine for pre-vectorization"),
+ cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
+
static cl::opt<int> DefaultTileSize(
"polly-default-tile-size",
cl::desc("The default tile size (if not enough were provided by"
@@ -176,7 +182,7 @@ private:
/// reason about parallelism.
static __isl_give isl_schedule_node *
prevectSchedBand(__isl_take isl_schedule_node *Node, unsigned DimToVectorize,
- int VectorWidth = 4);
+ int VectorWidth);
/// @brief Apply additional optimizations on the bands in the schedule tree.
///
@@ -298,7 +304,7 @@ isl_schedule_node *IslScheduleOptimizer:
for (int i = Dims - 1; i >= 0; i--)
if (isl_schedule_node_band_member_get_coincident(Node, i)) {
- Node = IslScheduleOptimizer::prevectSchedBand(Node, i);
+ Node = IslScheduleOptimizer::prevectSchedBand(Node, i, PrevectorWidth);
break;
}
Modified: polly/trunk/test/ScheduleOptimizer/prevectorization.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/prevectorization.ll?rev=245424&r1=245423&r2=245424&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/prevectorization.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/prevectorization.ll Wed Aug 19 03:46:11 2015
@@ -1,5 +1,11 @@
; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=polly -polly-ast -analyze < %s | FileCheck %s
; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl -polly-vectorizer=stripmine -polly-ast -analyze < %s | FileCheck %s
+
+; RUN: opt -S %loadPolly -polly-detect-unprofitable -basicaa -polly-opt-isl \
+; RUN: -polly-vectorizer=polly -polly-ast -analyze \
+; RUN: -polly-prevect-width=16 < %s | \
+; RUN: FileCheck %s -check-prefix=VEC16
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
@@ -73,6 +79,28 @@ attributes #0 = { nounwind uwtable "less
; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
+; VEC16: {
+; VEC16: #pragma known-parallel
+; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
+; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
+; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1)
+; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1)
+; VEC16: #pragma simd
+; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1)
+; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4);
+; VEC16: #pragma known-parallel
+; VEC16: for (int c0 = 0; c0 <= 47; c0 += 1)
+; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
+; VEC16: for (int c2 = 0; c2 <= 47; c2 += 1)
+; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1)
+; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1)
+; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1)
+; VEC16: #pragma simd
+; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1)
+; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5);
+; VEC16: }
+
+
!llvm.ident = !{!0}
!0 = !{!"clang version 3.5.0 "}
More information about the llvm-commits
mailing list