[polly] r275418 - GPGPU: Use a tile size of 32 by default

Thu Jul 14 07:14:02 PDT 2016

Author: grosser
Date: Thu Jul 14 09:14:02 2016
New Revision: 275418

URL: http://llvm.org/viewvc/llvm-project?rev=275418&view=rev
Log:
GPGPU: Use a tile size of 32 by default

The tile size was previously uninitialized. As a result, it was often zero (aka.
no tiling), which is not what we want in general. More importantly, there was
the risk for arbitrary tile sizes to be choosen, which we did not observe, but
which still is highly problematic.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/double-parallel-loop.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275418&r1=275417&r2=275418&view=diff
==============================================================================

--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Thu Jul 14 09:14:02 2016
@@ -75,6 +75,8 @@ public:
     Options->ctx = nullptr;
     Options->sizes = nullptr;
 
+    Options->tile_size = 32;
+
     Options->use_private_memory = false;
     Options->use_shared_memory = false;
     Options->max_shared_memory = 0;

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275418&r1=275417&r2=275418&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Thu Jul 14 09:14:02 2016
@@ -29,22 +29,21 @@
 ; SCHED:           child:
 ; SCHED:             mark: "kernel"
 ; SCHED:             child:
-; SCHED:               context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 255 and 0 <= b1 <= 255 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED:               context: "[b0, b1, t0, t1] -> { [] : 0 <= b0 <= 31 and 0 <= b1 <= 31 and 0 <= t0 <= 31 and 0 <= t1 <= 15 }"
 ; SCHED:               child:
-; SCHED:                 filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -3 - 4b0 + i0 <= 1024*floor((i0)/1024) <= -4b0 + i0 and -3 - 4b1 + i1 <= 1024*floor((i1)/1024) <= -4b1 + i1 }"
+; SCHED:                 filter: "[b0, b1] -> { Stmt_bb5[i0, i1] : -31 - 32b0 + i0 <= 8192*floor((i0)/8192) <= -32b0 + i0 and -31 - 32b1 + i1 <= 8192*floor((i1)/8192) <= -32b1 + i1 }"
 ; SCHED:                 child:
-; SCHED:                   schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/1024))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/1024))] }]"
+; SCHED:                   schedule: "[{ Stmt_bb5[i0, i1] -> [(floor((i0)/8192))] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/8192))] }]"
 ; SCHED:                   permutable: 1
 ; SCHED:                   coincident: [ 1, 1 ]
 ; SCHED:                   child:
-; SCHED:                     filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 4*floor((-t0 + i0)/4) = -t0 + i0 and 4*floor((-t1 + i1)/4) = -t1 + i1 and 0 <= t0 <= 3 and 0 <= t1 <= 3 }"
+; SCHED:                     filter: "[t0, t1] -> { Stmt_bb5[i0, i1] : 32*floor((-t0 + i0)/32) = -t0 + i0 and 16*floor((-t1 + i1)/16) = -t1 + i1 and 0 <= t0 <= 31 and 0 <= t1 <= 15 }"
 ; SCHED:                     child:
-; SCHED:                       schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(0)] }]"
+; SCHED:                       schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]"
 ; SCHED:                       permutable: 1
 ; SCHED:                       coincident: [ 1, 1 ]
 ; SCHED:       - filter: "{  }"
 
-
 ;    void double_parallel_loop(float A[][1024]) {
 ;      for (long i = 0; i < 1024; i++)
 ;        for (long j = 0; j < 1024; j++)