[polly] 937b00a - [Polly][SchedOpt] Account for prevectorization of multiple statements.
Michael Kruse via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 23 12:06:47 PST 2021
Author: Michael Kruse
Date: 2021-12-23T14:06:41-06:00
New Revision: 937b00ab2cf05d0eba7a96fe7b14d277996e663e
URL: https://github.com/llvm/llvm-project/commit/937b00ab2cf05d0eba7a96fe7b14d277996e663e
DIFF: https://github.com/llvm/llvm-project/commit/937b00ab2cf05d0eba7a96fe7b14d277996e663e.diff
LOG: [Polly][SchedOpt] Account for prevectorization of multiple statements.
A prevectorized loop may contain multiple statements, in which case
isl_schedule_node_band_sink will sink the vector band to multiple
leaves. Instead of statically assuming a specific tree structure after
sinking, add a SIMD marker to all inner bands.
Fixes llvm.org/PR52637
Added:
polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll
Modified:
polly/include/polly/ScheduleTreeTransform.h
polly/lib/Transform/ScheduleOptimizer.cpp
polly/lib/Transform/ScheduleTreeTransform.cpp
polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
Removed:
################################################################################
diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h
index 5ed0c64ad3f86..b35da47f59cb8 100644
--- a/polly/include/polly/ScheduleTreeTransform.h
+++ b/polly/include/polly/ScheduleTreeTransform.h
@@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor
}
};
+/// Recursively visit all nodes of a schedule tree while allowing changes.
+///
+/// The visit methods return an isl::schedule_node that is used to continue
+/// visiting the tree. Structural changes such as returning a
diff erent node
+/// will confuse the visitor.
+template <typename Derived, typename... Args>
+struct ScheduleNodeRewriter
+ : public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
+ Args...> {
+ Derived &getDerived() { return *static_cast<Derived *>(this); }
+ const Derived &getDerived() const {
+ return *static_cast<const Derived *>(this);
+ }
+
+ isl::schedule_node visitNode(isl::schedule_node Node, Args... args) {
+ return getDerived().visitChildren(Node);
+ }
+
+ isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) {
+ if (!Node.has_children())
+ return Node;
+
+ isl::schedule_node It = Node.first_child();
+ while (true) {
+ It = getDerived().visit(It, std::forward<Args>(args)...);
+ if (!It.has_next_sibling())
+ break;
+ It = It.next_sibling();
+ }
+ return It.parent();
+ }
+};
+
/// Is this node the marker for its parent band?
bool isBandMark(const isl::schedule_node &Node);
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 03878d5c8e4ba..0a6461139542d 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
return Result;
}
+struct InsertSimdMarkers : public ScheduleNodeRewriter<InsertSimdMarkers> {
+ isl::schedule_node visitBand(isl::schedule_node_band Band) {
+ isl::schedule_node Node = visitChildren(Band);
+
+ // Only add SIMD markers to innermost bands.
+ if (!Node.first_child().isa<isl::schedule_node_leaf>())
+ return Node;
+
+ isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr);
+ return Band.insert_mark(LoopMarker);
+ }
+};
+
isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
@@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
Node = Node.child(0);
// Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
// we will have troubles to match it in the backend.
- isl::schedule_node_band NodeBand =
- Node.as<isl::schedule_node_band>().set_ast_build_options(
- isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
- Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release()));
- Node = Node.child(0);
- if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
- Node = Node.parent();
- auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr);
+ Node = Node.as<isl::schedule_node_band>().set_ast_build_options(
+ isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
+
+ // Sink the inner loop into the smallest possible statements to make them
+ // represent a single vector instruction if possible.
+ Node = isl::manage(isl_schedule_node_band_sink(Node.release()));
+
+ // Add SIMD markers to those vector statements.
+ InsertSimdMarkers SimdMarkerInserter;
+ Node = SimdMarkerInserter.visit(Node);
+
PrevectOpts++;
- return Node.insert_mark(LoopMarker);
+ return Node.parent();
}
static bool isSimpleInnermostBand(const isl::schedule_node &Node) {
diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp
index a2cb538021fbe..01f18eadb4d9d 100644
--- a/polly/lib/Transform/ScheduleTreeTransform.cpp
+++ b/polly/lib/Transform/ScheduleTreeTransform.cpp
@@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand,
return NewBand.get_schedule();
}
-/// Recursively visit all nodes of a schedule tree while allowing changes.
-///
-/// The visit methods return an isl::schedule_node that is used to continue
-/// visiting the tree. Structural changes such as returning a
diff erent node
-/// will confuse the visitor.
-template <typename Derived, typename... Args>
-struct ScheduleNodeRewriter
- : public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
- Args...> {
- Derived &getDerived() { return *static_cast<Derived *>(this); }
- const Derived &getDerived() const {
- return *static_cast<const Derived *>(this);
- }
-
- isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) {
- if (!Node.has_children())
- return Node;
-
- isl::schedule_node It = Node.first_child();
- while (true) {
- It = getDerived().visit(It, std::forward<Args>(args)...);
- if (!It.has_next_sibling())
- break;
- It = It.next_sibling();
- }
- return It.parent();
- }
-};
-
/// Rewrite a schedule tree by reconstructing it bottom-up.
///
/// By default, the original schedule tree is reconstructed. To build a
diff --git a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
index 0c0bc12eb4cd4..9d0c6b5b9479f 100644
--- a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
+++ b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll
@@ -80,15 +80,17 @@ cleanup: ; preds = %for.cond, %entry
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]"
; CHECK: permutable: 1
; CHECK: child:
-; CHECK: mark: "SIMD"
-; CHECK: child:
-; CHECK: sequence:
-; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
+; CHECK: sequence:
+; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }"
+; CHECK: child:
+; CHECK: mark: "SIMD"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
; CHECK: permutable: 1
; CHECK: coincident: [ 1 ]
-; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
+; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }"
+; CHECK: child:
+; CHECK: mark: "SIMD"
; CHECK: child:
; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]"
; CHECK: permutable: 1
diff --git a/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll
new file mode 100644
index 0000000000000..3bd1f98385004
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s
+
+; isl_schedule_node_band_sink may sink into multiple children.
+; https://llvm.org/PR52637
+
+%struct.v4l2_sliced_vbi_data = type { [48 x i8] }
+
+define void @vivid_vbi_gen_sliced() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %vivid_vbi_gen_teletext.exit, %entry
+ %i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ]
+ %data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ]
+ %arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0
+ %arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6
+ %0 = load i8, i8* %arrayidx.i, align 1
+ store i8 %0, i8* %arraydecay, align 1
+ br label %for.body.for.body_crit_edge.i
+
+for.body.for.body_crit_edge.i: ; preds = %for.body.for.body_crit_edge.i, %for.body
+ %inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ]
+ %arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13
+ store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1
+ %inc10.i = add nuw nsw i32 %inc10.i13, 1
+ %exitcond.not.i = icmp eq i32 %inc10.i13, 42
+ br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i
+
+vivid_vbi_gen_teletext.exit: ; preds = %for.body.for.body_crit_edge.i
+ %incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %exitcond.not = icmp eq i32 %i.015, 1
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %vivid_vbi_gen_teletext.exit
+ ret void
+}
+
+
+; CHECK: schedule:
+; CHECK: schedule:
+; CHECK: mark: "SIMD"
+; CHECK: schedule:
+; CHECK: mark: "SIMD"
+; CHECK: schedule:
More information about the llvm-commits
mailing list