[polly] r297587 - [ScheduleOptimizer] Allow tiling after fusion

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 12 12:02:31 PDT 2017


Author: grosser
Date: Sun Mar 12 14:02:31 2017
New Revision: 297587

URL: http://llvm.org/viewvc/llvm-project?rev=297587&view=rev
Log:
[ScheduleOptimizer] Allow tiling after fusion

In ScheduleOptimizer::isTileableBand(), allow the case in which
the band node's child is an isl_schedule_sequence_node and its
grandchildren isl_schedule_leaf_nodes. This case can arise when
two or more statements are fused by the isl scheduler.

The tile_after_fusion.ll test has two statements in separate
loop nests and checks whether they are tiled after being fused
when polly-opt-fusion equals "max".

Reviewers: grosser

Subscribers: gareevroman, pollydev

Tags: #polly

Contributed-by: Theodoros Theodoridis <theodort at student.ethz.ch>

Differential Revision: https://reviews.llvm.org/D30815

Added:
    polly/trunk/test/ScheduleOptimizer/tile_after_fusion.ll
Modified:
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=297587&r1=297586&r2=297587&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Sun Mar 12 14:02:31 2017
@@ -433,6 +433,34 @@ ScheduleTreeOptimizer::applyRegisterTili
   return Node;
 }
 
+namespace {
+bool isSimpleInnermostBand(const isl::schedule_node &Node) {
+  assert(isl_schedule_node_get_type(Node.keep()) == isl_schedule_node_band);
+  assert(isl_schedule_node_n_children(Node.keep()) == 1);
+
+  auto ChildType = isl_schedule_node_get_type(Node.child(0).keep());
+
+  if (ChildType == isl_schedule_node_leaf)
+    return true;
+
+  if (ChildType != isl_schedule_node_sequence)
+    return false;
+
+  auto Sequence = Node.child(0);
+
+  for (int c = 0, nc = isl_schedule_node_n_children(Sequence.keep()); c < nc;
+       ++c) {
+    auto Child = Sequence.child(c);
+    if (isl_schedule_node_get_type(Child.keep()) != isl_schedule_node_filter)
+      return false;
+    if (isl_schedule_node_get_type(Child.child(0).keep()) !=
+        isl_schedule_node_leaf)
+      return false;
+  }
+  return true;
+}
+} // namespace
+
 bool ScheduleTreeOptimizer::isTileableBandNode(
     __isl_keep isl_schedule_node *Node) {
   if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
@@ -451,14 +479,8 @@ bool ScheduleTreeOptimizer::isTileableBa
   if (Dims <= 1)
     return false;
 
-  auto Child = isl_schedule_node_get_child(Node, 0);
-  auto Type = isl_schedule_node_get_type(Child);
-  isl_schedule_node_free(Child);
-
-  if (Type != isl_schedule_node_leaf)
-    return false;
-
-  return true;
+  auto ManagedNode = isl::manage(isl_schedule_node_copy(Node));
+  return isSimpleInnermostBand(ManagedNode);
 }
 
 __isl_give isl_schedule_node *

Added: polly/trunk/test/ScheduleOptimizer/tile_after_fusion.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/tile_after_fusion.ll?rev=297587&view=auto
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/tile_after_fusion.ll (added)
+++ polly/trunk/test/ScheduleOptimizer/tile_after_fusion.ll Sun Mar 12 14:02:31 2017
@@ -0,0 +1,139 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-ast -polly-opt-fusion=max -analyze < %s | FileCheck %s
+;
+;
+;    void tf(int C[256][256][256], int A0[256][256][256], int A1[256][256][256]) {
+;      for (int i = 0; i < 256; ++i)
+;        for (int j = 0; j < 256; ++j)
+;          for (int k = 0; k < 256; ++k)
+;            C[i][j][k] += A0[i][j][k];
+;
+;      for (int i = 0; i < 256; ++i)
+;        for (int j = 0; j < 256; ++j)
+;          for (int k = 0; k < 256; ++k)
+;            C[i][j][k] += A1[i][j][k];
+;    }
+;
+; The tile_after_fusion.ll test has two statements in separate loop nests and
+; checks whether they are tiled after being fused when polly-opt-fusion equals
+; "max".
+;
+; CHECK:       1st level tiling - Tiles
+; CHECK-NEXT:     for (int c0 = 0; c0 <= 7; c0 += 1)
+; CHECK-NEXT:       for (int c1 = 0; c1 <= 7; c1 += 1)
+; CHECK-NEXT:         for (int c2 = 0; c2 <= 7; c2 += 1) {
+; CHECK-NEXT:           // 1st level tiling - Points
+; CHECK-NEXT:           for (int c3 = 0; c3 <= 31; c3 += 1)
+; CHECK-NEXT:             for (int c4 = 0; c4 <= 31; c4 += 1)
+; CHECK-NEXT:               for (int c5 = 0; c5 <= 31; c5 += 1) {
+; CHECK-NEXT:                 Stmt_for_body6(32 * c0 + c3, 32 * c1 + c4, 32 * c2 + c5);
+; CHECK-NEXT:                 Stmt_for_body34(32 * c0 + c3, 32 * c1 + c4, 32 * c2 + c5);
+
+source_filename = "tile_after_fusion.c"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @tf([256 x [256 x i32]]* %C, [256 x [256 x i32]]* %A0, [256 x [256 x i32]]* %A1) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc20, %entry
+  %indvars.iv13 = phi i64 [ %indvars.iv.next14, %for.inc20 ], [ 0, %entry ]
+  %exitcond15 = icmp ne i64 %indvars.iv13, 256
+  br i1 %exitcond15, label %for.body, label %for.end22
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc17, %for.body
+  %indvars.iv10 = phi i64 [ %indvars.iv.next11, %for.inc17 ], [ 0, %for.body ]
+  %exitcond12 = icmp ne i64 %indvars.iv10, 256
+  br i1 %exitcond12, label %for.body3, label %for.end19
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %indvars.iv7 = phi i64 [ %indvars.iv.next8, %for.inc ], [ 0, %for.body3 ]
+  %exitcond9 = icmp ne i64 %indvars.iv7, 256
+  br i1 %exitcond9, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %arrayidx10 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %A0, i64 %indvars.iv13, i64 %indvars.iv10, i64 %indvars.iv7
+  %tmp = load i32, i32* %arrayidx10, align 4
+  %arrayidx16 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %C, i64 %indvars.iv13, i64 %indvars.iv10, i64 %indvars.iv7
+  %tmp16 = load i32, i32* %arrayidx16, align 4
+  %add = add nsw i32 %tmp16, %tmp
+  store i32 %add, i32* %arrayidx16, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc17
+
+for.inc17:                                        ; preds = %for.end
+  %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1
+  br label %for.cond1
+
+for.end19:                                        ; preds = %for.cond1
+  br label %for.inc20
+
+for.inc20:                                        ; preds = %for.end19
+  %indvars.iv.next14 = add nuw nsw i64 %indvars.iv13, 1
+  br label %for.cond
+
+for.end22:                                        ; preds = %for.cond
+  br label %for.cond24
+
+for.cond24:                                       ; preds = %for.inc54, %for.end22
+  %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc54 ], [ 0, %for.end22 ]
+  %exitcond6 = icmp ne i64 %indvars.iv4, 256
+  br i1 %exitcond6, label %for.body26, label %for.end56
+
+for.body26:                                       ; preds = %for.cond24
+  br label %for.cond28
+
+for.cond28:                                       ; preds = %for.inc51, %for.body26
+  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc51 ], [ 0, %for.body26 ]
+  %exitcond3 = icmp ne i64 %indvars.iv1, 256
+  br i1 %exitcond3, label %for.body30, label %for.end53
+
+for.body30:                                       ; preds = %for.cond28
+  br label %for.cond32
+
+for.cond32:                                       ; preds = %for.inc48, %for.body30
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc48 ], [ 0, %for.body30 ]
+  %exitcond = icmp ne i64 %indvars.iv, 256
+  br i1 %exitcond, label %for.body34, label %for.end50
+
+for.body34:                                       ; preds = %for.cond32
+  %arrayidx40 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %A1, i64 %indvars.iv4, i64 %indvars.iv1, i64 %indvars.iv
+  %tmp17 = load i32, i32* %arrayidx40, align 4
+  %arrayidx46 = getelementptr inbounds [256 x [256 x i32]], [256 x [256 x i32]]* %C, i64 %indvars.iv4, i64 %indvars.iv1, i64 %indvars.iv
+  %tmp18 = load i32, i32* %arrayidx46, align 4
+  %add47 = add nsw i32 %tmp18, %tmp17
+  store i32 %add47, i32* %arrayidx46, align 4
+  br label %for.inc48
+
+for.inc48:                                        ; preds = %for.body34
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond32
+
+for.end50:                                        ; preds = %for.cond32
+  br label %for.inc51
+
+for.inc51:                                        ; preds = %for.end50
+  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+  br label %for.cond28
+
+for.end53:                                        ; preds = %for.cond28
+  br label %for.inc54
+
+for.inc54:                                        ; preds = %for.end53
+  %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+  br label %for.cond24
+
+for.end56:                                        ; preds = %for.cond24
+  ret void
+}




More information about the llvm-commits mailing list