[polly] 937b00a - [Polly][SchedOpt] Account for prevectorization of multiple statements.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 23 12:06:47 PST 2021


Author: Michael Kruse
Date: 2021-12-23T14:06:41-06:00
New Revision: 937b00ab2cf05d0eba7a96fe7b14d277996e663e

URL: https://github.com/llvm/llvm-project/commit/937b00ab2cf05d0eba7a96fe7b14d277996e663e
DIFF: https://github.com/llvm/llvm-project/commit/937b00ab2cf05d0eba7a96fe7b14d277996e663e.diff

LOG: [Polly][SchedOpt] Account for prevectorization of multiple statements.

A prevectorized loop may contain multiple statements, in which case
isl_schedule_node_band_sink will sink the vector band to multiple
leaves. Instead of statically assuming a specific tree structure after
sinking, add a SIMD marker to all inner bands.

Fixes llvm.org/PR52637

Added: 
    polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll

Modified: 
    polly/include/polly/ScheduleTreeTransform.h
    polly/lib/Transform/ScheduleOptimizer.cpp
    polly/lib/Transform/ScheduleTreeTransform.cpp
    polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h
index 5ed0c64ad3f86..b35da47f59cb8 100644
--- a/polly/include/polly/ScheduleTreeTransform.h
+++ b/polly/include/polly/ScheduleTreeTransform.h
@@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor
   }
 };
 
+/// Recursively visit all nodes of a schedule tree while allowing changes.
+///
+/// The visit methods return an isl::schedule_node that is used to continue
+/// visiting the tree. Structural changes such as returning a 
diff erent node
+/// will confuse the visitor.
+template <typename Derived, typename... Args>
+struct ScheduleNodeRewriter
+    : public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
+                                          Args...> {
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+  const Derived &getDerived() const {
+    return *static_cast<const Derived *>(this);
+  }
+
+  isl::schedule_node visitNode(isl::schedule_node Node, Args... args) {
+    return getDerived().visitChildren(Node);
+  }
+
+  isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) {
+    if (!Node.has_children())
+      return Node;
+
+    isl::schedule_node It = Node.first_child();
+    while (true) {
+      It = getDerived().visit(It, std::forward<Args>(args)...);
+      if (!It.has_next_sibling())
+        break;
+      It = It.next_sibling();
+    }
+    return It.parent();
+  }
+};
+
 /// Is this node the marker for its parent band?
 bool isBandMark(const isl::schedule_node &Node);
 

diff  --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 03878d5c8e4ba..0a6461139542d 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
   return Result;
 }
 
+struct InsertSimdMarkers : public ScheduleNodeRewriter<InsertSimdMarkers> {
+  isl::schedule_node visitBand(isl::schedule_node_band Band) {
+    isl::schedule_node Node = visitChildren(Band);
+
+    // Only add SIMD markers to innermost bands.
+    if (!Node.first_child().isa<isl::schedule_node_leaf>())
+      return Node;
+
+    isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr);
+    return Band.insert_mark(LoopMarker);
+  }
+};
+
 isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
     isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
   assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
@@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
   Node = Node.child(0);
   // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
   // we will have troubles to match it in the backend.
-  isl::schedule_node_band NodeBand =
-      Node.as<isl::schedule_node_band>().set_ast_build_options(
-          isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
-  Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release()));
-  Node = Node.child(0);
-  if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
-    Node = Node.parent();
-  auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr);
+  Node = Node.as<isl::schedule_node_band>().set_ast_build_options(
+      isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
+
+  // Sink the inner loop into the smallest possible statements to make them
+  // represent a single vector instruction if possible.
+  Node = isl::manage(isl_schedule_node_band_sink(Node.release()));
+
+  // Add SIMD markers to those vector statements.
+  InsertSimdMarkers SimdMarkerInserter;
+  Node = SimdMarkerInserter.visit(Node);
+
   PrevectOpts++;
-  return Node.insert_mark(LoopMarker);
+  return Node.parent();
 }
 
 static bool isSimpleInnermostBand(const isl::schedule_node &Node) {

diff  --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp
index a2cb538021fbe..01f18eadb4d9d 100644
--- a/polly/lib/Transform/ScheduleTreeTransform.cpp
+++ b/polly/lib/Transform/ScheduleTreeTransform.cpp
@@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand,
   return NewBand.get_schedule();
 }
 
-/// Recursively visit all nodes of a schedule tree while allowing changes.
-///
-/// The visit methods return an isl::schedule_node that is used to continue
-/// visiting the tree. Structural changes such as returning a 
diff erent node
-/// will confuse the visitor.
-template <typename Derived, typename... Args>
-struct ScheduleNodeRewriter
-    : public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
-                                          Args...> {
-  Derived &getDerived() { return *static_cast<Derived *>(this); }
-  const Derived &getDerived() const {
-    return *static_cast<const Derived *>(this);
-  }
-
-  isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) {
-    if (!Node.has_children())
-      return Node;
-
-    isl::schedule_node It = Node.first_child();
-    while (true) {
-      It = getDerived().visit(It, std::forward<Args>(args)...);
-      if (!It.has_next_sibling())
-        break;
-      It = It.next_sibling();
-    }
-    return It.parent();
-  }
-};
-
 /// Rewrite a schedule tree by reconstructing it bottom-up.
 ///
 /// By default, the original schedule tree is reconstructed. To build a

diff  --git a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
index 0c0bc12eb4cd4..9d0c6b5b9479f 100644
--- a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
+++ b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
@@ -80,15 +80,17 @@ cleanup:                                          ; preds = %for.cond, %entry
 ; CHECK:           schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]"
 ; CHECK:           permutable: 1
 ; CHECK:           child:
-; CHECK:             mark: "SIMD"
-; CHECK:             child:
-; CHECK:               sequence:
-; CHECK:               - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
+; CHECK:             sequence:
+; CHECK:             - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
+; CHECK:               child:
+; CHECK:                 mark: "SIMD"
 ; CHECK:                 child:
 ; CHECK:                   schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
 ; CHECK:                   permutable: 1
 ; CHECK:                   coincident: [ 1 ]
-; CHECK:               - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
+; CHECK:             - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
+; CHECK:               child:
+; CHECK:                 mark: "SIMD"
 ; CHECK:                 child:
 ; CHECK:                   schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
 ; CHECK:                   permutable: 1

diff  --git a/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll
new file mode 100644
index 0000000000000..3bd1f98385004
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s
+
+; isl_schedule_node_band_sink may sink into multiple children.
+; https://llvm.org/PR52637
+
+%struct.v4l2_sliced_vbi_data = type { [48 x i8] }
+
+define void @vivid_vbi_gen_sliced() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %vivid_vbi_gen_teletext.exit, %entry
+  %i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ]
+  %data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ]
+  %arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0
+  %arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6
+  %0 = load i8, i8* %arrayidx.i, align 1
+  store i8 %0, i8* %arraydecay, align 1
+  br label %for.body.for.body_crit_edge.i
+
+for.body.for.body_crit_edge.i:                    ; preds = %for.body.for.body_crit_edge.i, %for.body
+  %inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ]
+  %arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13
+  store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1
+  %inc10.i = add nuw nsw i32 %inc10.i13, 1
+  %exitcond.not.i = icmp eq i32 %inc10.i13, 42
+  br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i
+
+vivid_vbi_gen_teletext.exit:                      ; preds = %for.body.for.body_crit_edge.i
+  %incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1
+  %inc = add nuw nsw i32 %i.015, 1
+  %exitcond.not = icmp eq i32 %i.015, 1
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %vivid_vbi_gen_teletext.exit
+  ret void
+}
+
+
+; CHECK: schedule:
+; CHECK:   schedule:
+; CHECK:     mark: "SIMD"
+; CHECK:       schedule:
+; CHECK:     mark: "SIMD"
+; CHECK:       schedule:


        


More information about the llvm-commits mailing list