[llvm] f5224d4 - [LoopFusion] Remove unreachable blocks from DT and LI after fusion
Diego Caballero via llvm-commits
llvm-commits at lists.llvm.org
Thu May 7 16:52:16 PDT 2020
Author: Diego Caballero
Date: 2020-05-07T16:44:40-07:00
New Revision: f5224d437eace9593b64ef2eff501df397bce6a8
URL: https://github.com/llvm/llvm-project/commit/f5224d437eace9593b64ef2eff501df397bce6a8
DIFF: https://github.com/llvm/llvm-project/commit/f5224d437eace9593b64ef2eff501df397bce6a8.diff
LOG: [LoopFusion] Remove unreachable blocks from DT and LI after fusion
This patch removes FC0.ExitBlock and FC1GuardBlock from DT and LI
after fusion of guarded loops. They become unreachable and LI
verification failed when they happened to be inside another loop.
Reviewed By: kbarton
Differential Revision: https://reviews.llvm.org/D78679
Added:
llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
Modified:
llvm/lib/Transforms/Scalar/LoopFuse.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 8d591d783f2e..e2b65f5dfa5a 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1536,7 +1536,10 @@ struct LoopFuser {
// Update DT/PDT
DTU.applyUpdates(TreeUpdates);
+ LI.removeBlock(FC1GuardBlock);
LI.removeBlock(FC1.Preheader);
+ LI.removeBlock(FC0.ExitBlock);
+ DTU.deleteBB(FC1GuardBlock);
DTU.deleteBB(FC1.Preheader);
DTU.deleteBB(FC0.ExitBlock);
DTU.flush();
diff --git a/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
new file mode 100644
index 000000000000..d94c2229a0fc
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
@@ -0,0 +1,116 @@
+; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s
+
+; Verify that LoopFusion can fuse two double-loop nests with guarded inner
+; loops. Loops are in canonical form.
+
+ at a = common global [10 x [10 x i32]] zeroinitializer
+ at b = common global [10 x [10 x i32]] zeroinitializer
+ at c = common global [10 x [10 x i32]] zeroinitializer
+
+; CHECK-LABEL: @double_loop_nest_inner_guard
+; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[OUTER_PH]]:
+; CHECK: br label %[[OUTER_BODY_INNER_GUARD:outer1.body.inner.guard]]
+
+; CHECK: [[OUTER_BODY_INNER_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[INNER_PH]]:
+; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_BODY]]:
+; First loop body.
+; CHECK: load
+; CHECK: add
+; CHECK: store
+; Second loop body.
+; CHECK: load
+; CHECK: mul
+; CHECK: store
+; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_EXIT]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[OUTER_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_INNER_GUARD]]
+
+; CHECK: [[OUTER_EXIT]]:
+; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[FUNC_EXIT]]:
+; CHECK-NEXT: ret
+
+define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+entry:
+ %cmp63 = icmp sgt i32 %m, 0
+ br i1 %cmp63, label %outer1.ph, label %func_exit
+
+outer1.ph:
+ %cmp261 = icmp sgt i32 %n, 0
+ %wide.trip.count76 = zext i32 %m to i64
+ %wide.trip.count72 = zext i32 %n to i64
+ br label %outer1.body.inner.guard
+
+outer1.body.inner.guard:
+ %iv74 = phi i64 [ 0, %outer1.ph ], [ %iv.next75, %outer1.latch ]
+ br i1 %cmp261, label %inner1.ph, label %outer1.latch
+
+inner1.ph:
+ br label %inner1.body
+
+inner1.body:
+ %iv70 = phi i64 [ %iv.next71, %inner1.body ], [ 0, %inner1.ph ]
+ %idx6 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv74, i64 %iv70
+ %0 = load i32, i32* %idx6
+ %add = add nsw i32 %0, 2
+ %idx10 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @b, i64 0, i64 %iv74, i64 %iv70
+ store i32 %add, i32* %idx10
+ %iv.next71 = add nuw nsw i64 %iv70, 1
+ %exitcond73 = icmp eq i64 %iv.next71, %wide.trip.count72
+ br i1 %exitcond73, label %inner1.exit, label %inner1.body
+
+inner1.exit:
+ br label %outer1.latch
+
+outer1.latch:
+ %iv.next75 = add nuw nsw i64 %iv74, 1
+ %exitcond77 = icmp eq i64 %iv.next75, %wide.trip.count76
+ br i1 %exitcond77, label %outer2.ph, label %outer1.body.inner.guard
+
+outer2.ph:
+ br label %outer2.body.inner.guard
+
+outer2.body.inner.guard:
+ %iv66 = phi i64 [ %iv.next67, %outer2.latch ], [ 0, %outer2.ph ]
+ br i1 %cmp261, label %inner2.ph, label %outer2.latch
+
+inner2.ph:
+ br label %inner2.body
+
+inner2.body:
+ %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ]
+ %idx27 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv66, i64 %iv
+ %1 = load i32, i32* %idx27
+ %mul = shl nsw i32 %1, 1
+ %idx31 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @c, i64 0, i64 %iv66, i64 %iv
+ store i32 %mul, i32* %idx31
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %wide.trip.count72
+ br i1 %exitcond, label %inner2.exit, label %inner2.body
+
+inner2.exit:
+ br label %outer2.latch
+
+outer2.latch:
+ %iv.next67 = add nuw nsw i64 %iv66, 1
+ %exitcond69 = icmp eq i64 %iv.next67, %wide.trip.count76
+ br i1 %exitcond69, label %outer2.exit, label %outer2.body.inner.guard
+
+outer2.exit:
+ br label %func_exit
+
+func_exit:
+ ret i32 undef
+}
diff --git a/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
new file mode 100644
index 000000000000..065b250c0c14
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
@@ -0,0 +1,160 @@
+; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s
+
+; Verify that LoopFusion can fuse two triple-loop nests with guarded inner
+; loops. Loops are in canonical form.
+
+ at a = common global [10 x [10 x [10 x i32]]] zeroinitializer
+ at b = common global [10 x [10 x [10 x i32]]] zeroinitializer
+ at c = common global [10 x [10 x [10 x i32]]] zeroinitializer
+
+; CHECK-LABEL: @triple_loop_nest_inner_guard
+; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[OUTER_PH]]:
+; CHECK: br label %[[OUTER_BODY_MIDDLE_GUARD:outer1.body.middle1.guard]]
+
+; CHECK: [[OUTER_BODY_MIDDLE_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[MIDDLE_PH:middle1.ph]], label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[MIDDLE_PH]]:
+; CHECK-NEXT: br label %[[MIDDLE_BODY_INNER_GUARD:middle1.body.inner1.guard]]
+
+; CHECK: [[MIDDLE_BODY_INNER_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[MIDDLE_LATCH:middle2.latch]]
+
+; CHECK: [[INNER_PH]]:
+; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_BODY]]:
+; First loop body.
+; CHECK: load
+; CHECK: add
+; CHECK: store
+; Second loop body.
+; CHECK: load
+; CHECK: mul
+; CHECK: store
+; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_EXIT]]:
+; CHECK-NEXT: br label %[[MIDDLE_LATCH:middle2.latch]]
+
+; CHECK: [[MIDDLE_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[MIDDLE_EXIT:middle2.exit]], label %[[MIDDLE_BODY_INNER_GUARD]]
+
+; CHECK: [[MIDDLE_EXIT]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[OUTER_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_MIDDLE_GUARD]]
+
+; CHECK: [[OUTER_EXIT]]:
+; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[FUNC_EXIT]]:
+; CHECK-NEXT: ret
+
+define i32 @triple_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+entry:
+ %cmp101 = icmp sgt i32 %m, 0
+ br i1 %cmp101, label %outer1.ph, label %func_exit
+
+outer1.ph:
+ %cmp298 = icmp sgt i32 %n, 0
+ %cmp696 = icmp sgt i32 %M, 0
+ %wide.trip.count122 = zext i32 %m to i64
+ %wide.trip.count118 = zext i32 %n to i64
+ %wide.trip.count114 = zext i32 %M to i64
+ br label %outer1.body.middle1.guard
+
+outer1.body.middle1.guard:
+ %iv120 = phi i64 [ 0, %outer1.ph ], [ %iv.next121, %outer1.latch ]
+ br i1 %cmp298, label %middle1.ph, label %outer1.latch
+
+middle1.ph:
+ br label %middle1.body.inner1.guard
+
+middle1.body.inner1.guard:
+ %iv116 = phi i64 [ %iv.next117, %middle1.latch ], [ 0, %middle1.ph ]
+ br i1 %cmp696, label %inner1.ph, label %middle1.latch
+
+inner1.ph:
+ br label %inner1.body
+
+inner1.body:
+ %iv112 = phi i64 [ %iv.next113, %inner1.body ], [ 0, %inner1.ph ]
+ %idx12 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv120, i64 %iv116, i64 %iv112
+ %0 = load i32, i32* %idx12
+ %add = add nsw i32 %0, 2
+ %idx18 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @b, i64 0, i64 %iv120, i64 %iv116, i64 %iv112
+ store i32 %add, i32* %idx18
+ %iv.next113 = add nuw nsw i64 %iv112, 1
+ %exitcond115 = icmp eq i64 %iv.next113, %wide.trip.count114
+ br i1 %exitcond115, label %inner1.exit, label %inner1.body
+
+inner1.exit:
+ br label %middle1.latch
+
+middle1.latch:
+ %iv.next117 = add nuw nsw i64 %iv116, 1
+ %exitcond119 = icmp eq i64 %iv.next117, %wide.trip.count118
+ br i1 %exitcond119, label %middle1.exit, label %middle1.body.inner1.guard
+
+middle1.exit:
+ br label %outer1.latch
+
+outer1.latch:
+ %iv.next121 = add nuw nsw i64 %iv120, 1
+ %exitcond123 = icmp eq i64 %iv.next121, %wide.trip.count122
+ br i1 %exitcond123, label %outer2.ph, label %outer1.body.middle1.guard
+
+outer2.ph:
+ br label %outer2.middle2.guard
+
+outer2.middle2.guard:
+ %iv108 = phi i64 [ %iv.next109, %outer2.latch ], [ 0, %outer2.ph ]
+ br i1 %cmp298, label %middle2.ph, label %outer2.latch
+
+middle2.ph:
+ br label %middle2.body.inner2.guard
+
+middle2.body.inner2.guard:
+ %iv104 = phi i64 [ %iv.next105, %middle2.latch ], [ 0, %middle2.ph ]
+ br i1 %cmp696, label %inner2.ph, label %middle2.latch
+
+inner2.ph:
+ br label %inner2.body
+
+inner2.body:
+ %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ]
+ %idx45 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv108, i64 %iv104, i64 %iv
+ %1 = load i32, i32* %idx45
+ %mul = shl nsw i32 %1, 1
+ %idx51 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @c, i64 0, i64 %iv108, i64 %iv104, i64 %iv
+ store i32 %mul, i32* %idx51
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %wide.trip.count114
+ br i1 %exitcond, label %inner2.exit, label %inner2.body
+
+inner2.exit:
+ br label %middle2.latch
+
+middle2.latch:
+ %iv.next105 = add nuw nsw i64 %iv104, 1
+ %exitcond107 = icmp eq i64 %iv.next105, %wide.trip.count118
+ br i1 %exitcond107, label %middle2.exit, label %middle2.body.inner2.guard
+
+middle2.exit:
+ br label %outer2.latch
+
+outer2.latch:
+ %iv.next109 = add nuw nsw i64 %iv108, 1
+ %exitcond111 = icmp eq i64 %iv.next109, %wide.trip.count122
+ br i1 %exitcond111, label %outer2.exit, label %outer2.middle2.guard
+
+outer2.exit:
+ br label %func_exit
+
+func_exit:
+ ret i32 undef
+}
More information about the llvm-commits
mailing list