[polly] r245599 - Do really not unroll the vector loop in combination with register tiling

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 20 12:08:16 PDT 2015


Author: grosser
Date: Thu Aug 20 14:08:16 2015
New Revision: 245599

URL: http://llvm.org/viewvc/llvm-project?rev=245599&view=rev
Log:
Do really not unroll the vector loop in combination with register tiling

The previous commit lacked a test case for register tiling + pre-vectorization
and we obviously got it immediately wrong.

Modified:
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp
    polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=245599&r1=245598&r2=245599&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Thu Aug 20 14:08:16 2015
@@ -306,12 +306,11 @@ IslScheduleOptimizer::prevectSchedBand(_
       isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
   Node = isl_schedule_node_band_tile(Node, Sizes);
   Node = isl_schedule_node_child(Node, 0);
-  Node = isl_schedule_node_band_sink(Node);
-
   // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
   // we will have troubles to match it in the backend.
   Node = isl_schedule_node_band_set_ast_build_options(
-      Node, isl_union_set_read_from_str(Ctx, "{unroll[x]: 1 = 0}"));
+      Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
+  Node = isl_schedule_node_band_sink(Node);
   Node = isl_schedule_node_child(Node, 0);
   return Node;
 }

Modified: polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll?rev=245599&r1=245598&r2=245599&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/rectangular-tiling.ll Thu Aug 20 14:08:16 2015
@@ -14,6 +14,14 @@
 ; RUN:                -polly-2nd-level-tile-sizes=16,8 < %s | \
 ; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER
 
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-opt-isl -analyze \
+; RUN:                -polly-2nd-level-tiling -polly-ast \
+; RUN:                -polly-tile-sizes=256,16 -polly-no-early-exit \
+; RUN:                -polly-register-tiling -polly-register-tile-sizes=2,4 \
+; RUN:                -polly-vectorizer=polly \
+; RUN:                -polly-2nd-level-tile-sizes=16,8 < %s | \
+; RUN: FileCheck %s --check-prefix=TWO-PLUS-REGISTER-PLUS-VECTORIZATION
+
 ; CHECK: for (int c0 = 0; c0 <= 3; c0 += 1)
 ; CHECK:   for (int c1 = 0; c1 <= 31; c1 += 1)
 ; CHECK:     for (int c2 = 0; c2 <= 255; c2 += 1)
@@ -46,7 +54,20 @@
 ; TWO-PLUS-REGISTER:             Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 2 * c5 + 1);
 ; TWO-PLUS-REGISTER:           }
 
-
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: #pragma known-parallel
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION: for (int c0 = 0; c0 <= 3; c0 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:   for (int c1 = 0; c1 <= 31; c1 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:     for (int c2 = 0; c2 <= 15; c2 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:       for (int c3 = 0; c3 <= 1; c3 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:         for (int c4 = 0; c4 <= 7; c4 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:           for (int c5 = 0; c5 <= 1; c5 += 1) {
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             #pragma simd
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             for (int c8 = 0; c8 <= 3; c8 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:               Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4, 16 * c1 + 8 * c3 + 4 * c5 + c8);
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             #pragma simd
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:             for (int c8 = 0; c8 <= 3; c8 += 1)
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:               Stmt_for_body3(256 * c0 + 16 * c2 + 2 * c4 + 1, 16 * c1 + 8 * c3 + 4 * c5 + c8);
+; TWO-PLUS-REGISTER-PLUS-VECTORIZATION:           }
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
 




More information about the llvm-commits mailing list