[polly] r261620 - Annotation of SIMD loops
Roman Gareev via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 23 01:00:14 PST 2016
Author: romangareev
Date: Tue Feb 23 03:00:13 2016
New Revision: 261620
URL: http://llvm.org/viewvc/llvm-project?rev=261620&view=rev
Log:
Annotation of SIMD loops
Use 'mark' nodes annotate a SIMD loop during ScheduleTransformation and skip
parallelism checks.
The buildbot shows the following compile/execution time changes:
Compile time:
Improvements Δ Previous Current σ
…/gesummv -6.06% 0.2640 0.2480 0.0055
…/gemver -4.46% 0.4480 0.4280 0.0044
…/covariance -4.31% 0.8360 0.8000 0.0065
…/adi -3.23% 0.9920 0.9600 0.0065
…/doitgen -2.53% 0.9480 0.9240 0.0090
…/3mm -2.33% 1.0320 1.0080 0.0087
Execution time:
Regressions Δ Previous Current σ
…/viterbi 1.70% 5.1840 5.2720 0.0074
…/smallpt 1.06% 12.4920 12.6240 0.0040
Reviewed-by: Tobias Grosser <tobias at grosser.es>
Differential Revision: http://reviews.llvm.org/D14491
Modified:
polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
polly/trunk/lib/CodeGen/IslAst.cpp
polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
polly/trunk/lib/Transform/ScheduleOptimizer.cpp
polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll
polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll
polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll
polly/trunk/test/ScheduleOptimizer/prevectorization.ll
polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll
polly/trunk/test/ScopInfo/stride_detection.ll
Modified: polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IslNodeBuilder.h?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IslNodeBuilder.h (original)
+++ polly/trunk/include/polly/CodeGen/IslNodeBuilder.h Tue Feb 23 03:00:13 2016
@@ -242,7 +242,7 @@ protected:
bool preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass);
void createForVector(__isl_take isl_ast_node *For, int VectorWidth);
- void createForSequential(__isl_take isl_ast_node *For);
+ void createForSequential(__isl_take isl_ast_node *For, bool KnownParallel);
/// Create LLVM-IR that executes a for node thread parallel.
///
Modified: polly/trunk/lib/CodeGen/IslAst.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslAst.cpp?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslAst.cpp (original)
+++ polly/trunk/lib/CodeGen/IslAst.cpp Tue Feb 23 03:00:13 2016
@@ -255,11 +255,13 @@ astBuildAfterFor(__isl_take isl_ast_node
// tested for parallelism. Test them here to ensure we check all innermost
// loops for parallelism.
if (Payload->IsInnermost && BuildInfo->InParallelFor) {
- if (Payload->IsOutermostParallel)
+ if (Payload->IsOutermostParallel) {
Payload->IsInnermostParallel = true;
- else
- Payload->IsInnermostParallel =
- astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+ } else {
+ if (PollyVectorizerChoice == VECTORIZER_NONE)
+ Payload->IsInnermostParallel =
+ astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
+ }
}
if (Payload->IsOutermostParallel)
BuildInfo->InParallelFor = false;
@@ -268,6 +270,31 @@ astBuildAfterFor(__isl_take isl_ast_node
return Node;
}
+static isl_stat astBuildBeforeMark(__isl_keep isl_id *MarkId,
+ __isl_keep isl_ast_build *Build,
+ void *User) {
+ if (!MarkId)
+ return isl_stat_error;
+
+ AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
+ if (!strcmp(isl_id_get_name(MarkId), "SIMD"))
+ BuildInfo->InParallelFor = true;
+
+ return isl_stat_ok;
+}
+
+static __isl_give isl_ast_node *
+astBuildAfterMark(__isl_take isl_ast_node *Node,
+ __isl_keep isl_ast_build *Build, void *User) {
+ assert(isl_ast_node_get_type(Node) == isl_ast_node_mark);
+ AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
+ auto *Id = isl_ast_node_mark_get_id(Node);
+ if (!strcmp(isl_id_get_name(Id), "SIMD"))
+ BuildInfo->InParallelFor = false;
+ isl_id_free(Id);
+ return Node;
+}
+
static __isl_give isl_ast_node *AtEachDomain(__isl_take isl_ast_node *Node,
__isl_keep isl_ast_build *Build,
void *User) {
@@ -383,6 +410,12 @@ void IslAst::init(const Dependences &D)
&BuildInfo);
Build =
isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo);
+
+ Build = isl_ast_build_set_before_each_mark(Build, &astBuildBeforeMark,
+ &BuildInfo);
+
+ Build = isl_ast_build_set_after_each_mark(Build, &astBuildAfterMark,
+ &BuildInfo);
}
buildRunCondition(Build);
Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Tue Feb 23 03:00:13 2016
@@ -352,9 +352,24 @@ void IslNodeBuilder::createUserVector(__
}
void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) {
+ auto *Id = isl_ast_node_mark_get_id(Node);
auto Child = isl_ast_node_mark_get_node(Node);
- create(Child);
isl_ast_node_free(Node);
+ // If a child node of a 'SIMD mark' is a loop that has a single iteration,
+ // it will be optimized away and we should skip it.
+ if (!strcmp(isl_id_get_name(Id), "SIMD") &&
+ isl_ast_node_get_type(Child) == isl_ast_node_for) {
+ bool Vector = PollyVectorizerChoice == VECTORIZER_POLLY;
+ int VectorWidth = getNumberOfIterations(Child);
+ if (Vector && 1 < VectorWidth && VectorWidth <= 16)
+ createForVector(Child, VectorWidth);
+ else
+ createForSequential(Child, true);
+ isl_id_free(Id);
+ return;
+ }
+ create(Child);
+ isl_id_free(Id);
}
void IslNodeBuilder::createForVector(__isl_take isl_ast_node *For,
@@ -417,7 +432,8 @@ void IslNodeBuilder::createForVector(__i
isl_ast_expr_free(Iterator);
}
-void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For) {
+void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For,
+ bool KnownParallel) {
isl_ast_node *Body;
isl_ast_expr *Init, *Inc, *Iterator, *UB;
isl_id *IteratorID;
@@ -428,8 +444,8 @@ void IslNodeBuilder::createForSequential
CmpInst::Predicate Predicate;
bool Parallel;
- Parallel =
- IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For);
+ Parallel = KnownParallel || (IslAstInfo::isParallel(For) &&
+ !IslAstInfo::isReductionParallel(For));
Body = isl_ast_node_for_get_body(For);
@@ -647,7 +663,7 @@ void IslNodeBuilder::createFor(__isl_tak
createForParallel(For);
return;
}
- createForSequential(For);
+ createForSequential(For, false);
}
void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Tue Feb 23 03:00:13 2016
@@ -289,6 +289,10 @@ ScheduleTreeOptimizer::prevectSchedBand(
Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
Node = isl_schedule_node_band_sink(Node);
Node = isl_schedule_node_child(Node, 0);
+ if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf)
+ Node = isl_schedule_node_parent(Node);
+ isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr);
+ Node = isl_schedule_node_insert_mark(Node, LoopMarker);
return Node;
}
Modified: polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll (original)
+++ polly/trunk/test/Isl/CodeGen/simple_vec_strides_multidim.ll Tue Feb 23 03:00:13 2016
@@ -1,4 +1,4 @@
-; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly -S -dce < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-opt-isl -polly-codegen -polly-vectorizer=polly -polly-prevect-width=8 -S -dce < %s | FileCheck %s
;
; void foo(long n, float A[restrict][n], float B[restrict][n],
; float C[restrict][n], float D[restrict][n]) {
Modified: polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/full_partial_tile_separation.ll Tue Feb 23 03:00:13 2016
@@ -1,23 +1,26 @@
; RUN: opt -S %loadPolly -polly-vectorizer=stripmine -polly-opt-isl -polly-ast -analyze < %s | FileCheck %s
; CHECK: // 1st level tiling - Tiles
-; CHECK-NEXT: #pragma known-parallel
-; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
-; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
-; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
-; CHECK-NEXT: // 1st level tiling - Points
-; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
-; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
-; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1)
-; CHECK-NEXT: #pragma simd
-; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
-; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
-; CHECK-NEXT: if (32 * c1 + 31 >= nj)
-; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1)
-; CHECK-NEXT: #pragma simd
-; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
-; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
-; CHECK-NEXT: }
-; CHECK-NEXT: }
+; CHECK-NEXT: #pragma known-parallel
+; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1)
+; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1)
+; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) {
+; CHECK-NEXT: // 1st level tiling - Points
+; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) {
+; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1)
+; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
+; CHECK-NEXT: // SIMD
+; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1)
+; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
+; CHECK-NEXT: }
+; CHECK-NEXT: if (32 * c1 + 31 >= nj)
+; CHECK-NEXT: #pragma minimal dependence distance: 1
+; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) {
+; CHECK-NEXT: // SIMD
+; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1)
+; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5);
+; CHECK-NEXT: }
+; CHECK-NEXT: }
+; CHECK-NEXT: }
; Function Attrs: nounwind uwtable
define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, [1024 x double]* %C, [1024 x double]* %A, [1024 x double]* %B) #0 {
Modified: polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/prevectorization-without-tiling.ll Tue Feb 23 03:00:13 2016
@@ -56,14 +56,14 @@ attributes #0 = { nounwind uwtable "less
; CHECK: #pragma known-parallel
; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)
-; CHECK: #pragma simd
+; CHECK: // SIMD
; CHECK: for (int c2 = 0; c2 <= 3; c2 += 1)
; CHECK: Stmt_for_body3(c0, 4 * c1 + c2);
; CHECK: #pragma known-parallel
; CHECK: for (int c0 = 0; c0 <= 1535; c0 += 1)
; CHECK: for (int c1 = 0; c1 <= 383; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 1535; c2 += 1)
-; CHECK: #pragma simd
+; CHECK: // SIMD
; CHECK: for (int c3 = 0; c3 <= 3; c3 += 1)
; CHECK: Stmt_for_body8(c0, 4 * c1 + c3, c2);
Modified: polly/trunk/test/ScheduleOptimizer/prevectorization.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/prevectorization.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/prevectorization.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/prevectorization.ll Tue Feb 23 03:00:13 2016
@@ -65,7 +65,7 @@ attributes #0 = { nounwind uwtable "less
; CHECK: for (int c1 = 0; c1 <= 47; c1 += 1)
; CHECK: for (int c2 = 0; c2 <= 31; c2 += 1)
; CHECK: for (int c3 = 0; c3 <= 7; c3 += 1)
-; CHECK: #pragma simd
+; CHECK: // SIMD
; CHECK: for (int c4 = 0; c4 <= 3; c4 += 1)
; CHECK: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 4 * c3 + c4);
; CHECK: #pragma known-parallel
@@ -75,7 +75,7 @@ attributes #0 = { nounwind uwtable "less
; CHECK: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK: for (int c4 = 0; c4 <= 7; c4 += 1)
; CHECK: for (int c5 = 0; c5 <= 31; c5 += 1)
-; CHECK: #pragma simd
+; CHECK: // SIMD
; CHECK: for (int c6 = 0; c6 <= 3; c6 += 1)
; CHECK: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5);
@@ -85,7 +85,7 @@ attributes #0 = { nounwind uwtable "less
; VEC16: for (int c1 = 0; c1 <= 47; c1 += 1)
; VEC16: for (int c2 = 0; c2 <= 31; c2 += 1)
; VEC16: for (int c3 = 0; c3 <= 1; c3 += 1)
-; VEC16: #pragma simd
+; VEC16: // SIMD
; VEC16: for (int c4 = 0; c4 <= 15; c4 += 1)
; VEC16: Stmt_for_body3(32 * c0 + c2, 32 * c1 + 16 * c3 + c4);
; VEC16: #pragma known-parallel
@@ -95,7 +95,7 @@ attributes #0 = { nounwind uwtable "less
; VEC16: for (int c3 = 0; c3 <= 31; c3 += 1)
; VEC16: for (int c4 = 0; c4 <= 1; c4 += 1)
; VEC16: for (int c5 = 0; c5 <= 31; c5 += 1)
-; VEC16: #pragma simd
+; VEC16: // SIMD
; VEC16: for (int c6 = 0; c6 <= 15; c6 += 1)
; VEC16: Stmt_for_body8(32 * c0 + c3, 32 * c1 + 16 * c4 + c6, 32 * c2 + c5);
; VEC16: }
Modified: polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll Tue Feb 23 03:00:13 2016
@@ -74,10 +74,10 @@
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c3 = 0; c3 <= 1; c3 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c4 = 0; c4 <= 7; c4 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c5 = 0; c5 <= 1; c5 += 1) {
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
-; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma simd
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: // SIMD
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c8 = 0; c8 <= 3; c8 += 1)
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: }
Modified: polly/trunk/test/ScopInfo/stride_detection.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/stride_detection.ll?rev=261620&r1=261619&r2=261620&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/stride_detection.ll (original)
+++ polly/trunk/test/ScopInfo/stride_detection.ll Tue Feb 23 03:00:13 2016
@@ -1,4 +1,4 @@
-; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-vectorizer=polly -polly-codegen < %s -S | FileCheck %s
+; RUN: opt %loadPolly -polly-opt-isl -polly-vectorizer=polly -polly-codegen < %s -S | FileCheck %s
; #pragma known-parallel
; for (int c0 = 0; c0 <= 31; c0 += 1)
More information about the llvm-commits
mailing list