[llvm] f5224d4 - [LoopFusion] Remove unreachable blocks from DT and LI after fusion

Diego Caballero via llvm-commits llvm-commits at lists.llvm.org
Thu May 7 16:52:16 PDT 2020


Author: Diego Caballero
Date: 2020-05-07T16:44:40-07:00
New Revision: f5224d437eace9593b64ef2eff501df397bce6a8

URL: https://github.com/llvm/llvm-project/commit/f5224d437eace9593b64ef2eff501df397bce6a8
DIFF: https://github.com/llvm/llvm-project/commit/f5224d437eace9593b64ef2eff501df397bce6a8.diff

LOG: [LoopFusion] Remove unreachable blocks from DT and LI after fusion

This patch removes FC0.ExitBlock and FC1GuardBlock from DT and LI
after fusion of guarded loops. They become unreachable and LI
verification failed when they happened to be inside another loop.

Reviewed By: kbarton

Differential Revision: https://reviews.llvm.org/D78679

Added: 
    llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
    llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopFuse.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 8d591d783f2e..e2b65f5dfa5a 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1536,7 +1536,10 @@ struct LoopFuser {
     // Update DT/PDT
     DTU.applyUpdates(TreeUpdates);
 
+    LI.removeBlock(FC1GuardBlock);
     LI.removeBlock(FC1.Preheader);
+    LI.removeBlock(FC0.ExitBlock);
+    DTU.deleteBB(FC1GuardBlock);
     DTU.deleteBB(FC1.Preheader);
     DTU.deleteBB(FC0.ExitBlock);
     DTU.flush();

diff  --git a/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
new file mode 100644
index 000000000000..d94c2229a0fc
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/double_loop_nest_inner_guard.ll
@@ -0,0 +1,116 @@
+; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s
+
+; Verify that LoopFusion can fuse two double-loop nests with guarded inner
+; loops. Loops are in canonical form.
+
+ at a = common global [10 x [10 x i32]] zeroinitializer
+ at b = common global [10 x [10 x i32]] zeroinitializer
+ at c = common global [10 x [10 x i32]] zeroinitializer
+
+; CHECK-LABEL: @double_loop_nest_inner_guard
+; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[OUTER_PH]]:
+; CHECK: br label %[[OUTER_BODY_INNER_GUARD:outer1.body.inner.guard]]
+
+; CHECK: [[OUTER_BODY_INNER_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[INNER_PH]]:
+; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_BODY]]:
+; First loop body.
+; CHECK: load
+; CHECK: add
+; CHECK: store
+; Second loop body.
+; CHECK: load
+; CHECK: mul
+; CHECK: store
+; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_EXIT]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[OUTER_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_INNER_GUARD]]
+
+; CHECK: [[OUTER_EXIT]]:
+; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[FUNC_EXIT]]:
+; CHECK-NEXT: ret
+
+define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+entry:
+  %cmp63 = icmp sgt i32 %m, 0
+  br i1 %cmp63, label %outer1.ph, label %func_exit
+
+outer1.ph:
+  %cmp261 = icmp sgt i32 %n, 0
+  %wide.trip.count76 = zext i32 %m to i64
+  %wide.trip.count72 = zext i32 %n to i64
+  br label %outer1.body.inner.guard
+
+outer1.body.inner.guard:
+  %iv74 = phi i64 [ 0, %outer1.ph ], [ %iv.next75, %outer1.latch ]
+  br i1 %cmp261, label %inner1.ph, label %outer1.latch
+
+inner1.ph:
+  br label %inner1.body
+
+inner1.body:
+  %iv70 = phi i64 [ %iv.next71, %inner1.body ], [ 0, %inner1.ph ]
+  %idx6 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv74, i64 %iv70
+  %0 = load i32, i32* %idx6
+  %add = add nsw i32 %0, 2
+  %idx10 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @b, i64 0, i64 %iv74, i64 %iv70
+  store i32 %add, i32* %idx10
+  %iv.next71 = add nuw nsw i64 %iv70, 1
+  %exitcond73 = icmp eq i64 %iv.next71, %wide.trip.count72
+  br i1 %exitcond73, label %inner1.exit, label %inner1.body
+
+inner1.exit:
+  br label %outer1.latch
+
+outer1.latch:
+  %iv.next75 = add nuw nsw i64 %iv74, 1
+  %exitcond77 = icmp eq i64 %iv.next75, %wide.trip.count76
+  br i1 %exitcond77, label %outer2.ph, label %outer1.body.inner.guard
+
+outer2.ph:
+  br label %outer2.body.inner.guard
+
+outer2.body.inner.guard:
+  %iv66 = phi i64 [ %iv.next67, %outer2.latch ], [ 0, %outer2.ph ]
+  br i1 %cmp261, label %inner2.ph, label %outer2.latch
+
+inner2.ph:
+  br label %inner2.body
+
+inner2.body:
+  %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ]
+  %idx27 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv66, i64 %iv
+  %1 = load i32, i32* %idx27
+  %mul = shl nsw i32 %1, 1
+  %idx31 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @c, i64 0, i64 %iv66, i64 %iv
+  store i32 %mul, i32* %idx31
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %wide.trip.count72
+  br i1 %exitcond, label %inner2.exit, label %inner2.body
+
+inner2.exit:
+  br label %outer2.latch
+
+outer2.latch:
+  %iv.next67 = add nuw nsw i64 %iv66, 1
+  %exitcond69 = icmp eq i64 %iv.next67, %wide.trip.count76
+  br i1 %exitcond69, label %outer2.exit, label %outer2.body.inner.guard
+
+outer2.exit:
+  br label %func_exit
+
+func_exit:
+  ret i32 undef
+}

diff  --git a/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
new file mode 100644
index 000000000000..065b250c0c14
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/triple_loop_nest_inner_guard.ll
@@ -0,0 +1,160 @@
+; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s
+
+; Verify that LoopFusion can fuse two triple-loop nests with guarded inner
+; loops. Loops are in canonical form.
+
+ at a = common global [10 x [10 x [10 x i32]]] zeroinitializer
+ at b = common global [10 x [10 x [10 x i32]]] zeroinitializer
+ at c = common global [10 x [10 x [10 x i32]]] zeroinitializer
+
+; CHECK-LABEL: @triple_loop_nest_inner_guard
+; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[OUTER_PH]]:
+; CHECK: br label %[[OUTER_BODY_MIDDLE_GUARD:outer1.body.middle1.guard]]
+
+; CHECK: [[OUTER_BODY_MIDDLE_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[MIDDLE_PH:middle1.ph]], label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[MIDDLE_PH]]:
+; CHECK-NEXT: br label %[[MIDDLE_BODY_INNER_GUARD:middle1.body.inner1.guard]]
+
+; CHECK: [[MIDDLE_BODY_INNER_GUARD]]:
+; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[MIDDLE_LATCH:middle2.latch]]
+
+; CHECK: [[INNER_PH]]:
+; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_BODY]]:
+; First loop body.
+; CHECK: load
+; CHECK: add
+; CHECK: store
+; Second loop body.
+; CHECK: load
+; CHECK: mul
+; CHECK: store
+; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]]
+
+; CHECK: [[INNER_EXIT]]:
+; CHECK-NEXT: br label %[[MIDDLE_LATCH:middle2.latch]]
+
+; CHECK: [[MIDDLE_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[MIDDLE_EXIT:middle2.exit]], label %[[MIDDLE_BODY_INNER_GUARD]]
+
+; CHECK: [[MIDDLE_EXIT]]:
+; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]]
+
+; CHECK: [[OUTER_LATCH]]:
+; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_MIDDLE_GUARD]]
+
+; CHECK: [[OUTER_EXIT]]:
+; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]]
+
+; CHECK: [[FUNC_EXIT]]:
+; CHECK-NEXT: ret
+
+define i32 @triple_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) {
+entry:
+  %cmp101 = icmp sgt i32 %m, 0
+  br i1 %cmp101, label %outer1.ph, label %func_exit
+
+outer1.ph:
+  %cmp298 = icmp sgt i32 %n, 0
+  %cmp696 = icmp sgt i32 %M, 0
+  %wide.trip.count122 = zext i32 %m to i64
+  %wide.trip.count118 = zext i32 %n to i64
+  %wide.trip.count114 = zext i32 %M to i64
+  br label %outer1.body.middle1.guard
+
+outer1.body.middle1.guard:
+  %iv120 = phi i64 [ 0, %outer1.ph ], [ %iv.next121, %outer1.latch ]
+  br i1 %cmp298, label %middle1.ph, label %outer1.latch
+
+middle1.ph:
+  br label %middle1.body.inner1.guard
+
+middle1.body.inner1.guard:
+  %iv116 = phi i64 [ %iv.next117, %middle1.latch ], [ 0, %middle1.ph ]
+  br i1 %cmp696, label %inner1.ph, label %middle1.latch
+
+inner1.ph:
+  br label %inner1.body
+
+inner1.body:
+  %iv112 = phi i64 [ %iv.next113, %inner1.body ], [ 0, %inner1.ph ]
+  %idx12 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv120, i64 %iv116, i64 %iv112
+  %0 = load i32, i32* %idx12
+  %add = add nsw i32 %0, 2
+  %idx18 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @b, i64 0, i64 %iv120, i64 %iv116, i64 %iv112
+  store i32 %add, i32* %idx18
+  %iv.next113 = add nuw nsw i64 %iv112, 1
+  %exitcond115 = icmp eq i64 %iv.next113, %wide.trip.count114
+  br i1 %exitcond115, label %inner1.exit, label %inner1.body
+
+inner1.exit:
+  br label %middle1.latch
+
+middle1.latch:
+  %iv.next117 = add nuw nsw i64 %iv116, 1
+  %exitcond119 = icmp eq i64 %iv.next117, %wide.trip.count118
+  br i1 %exitcond119, label %middle1.exit, label %middle1.body.inner1.guard
+
+middle1.exit:
+  br label %outer1.latch
+
+outer1.latch:
+  %iv.next121 = add nuw nsw i64 %iv120, 1
+  %exitcond123 = icmp eq i64 %iv.next121, %wide.trip.count122
+  br i1 %exitcond123, label %outer2.ph, label %outer1.body.middle1.guard
+
+outer2.ph:
+  br label %outer2.middle2.guard
+
+outer2.middle2.guard:
+  %iv108 = phi i64 [ %iv.next109, %outer2.latch ], [ 0, %outer2.ph ]
+  br i1 %cmp298, label %middle2.ph, label %outer2.latch
+
+middle2.ph:
+  br label %middle2.body.inner2.guard
+
+middle2.body.inner2.guard:
+  %iv104 = phi i64 [ %iv.next105, %middle2.latch ], [ 0, %middle2.ph ]
+  br i1 %cmp696, label %inner2.ph, label %middle2.latch
+
+inner2.ph:
+  br label %inner2.body
+
+inner2.body:
+  %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ]
+  %idx45 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv108, i64 %iv104, i64 %iv
+  %1 = load i32, i32* %idx45
+  %mul = shl nsw i32 %1, 1
+  %idx51 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @c, i64 0, i64 %iv108, i64 %iv104, i64 %iv
+  store i32 %mul, i32* %idx51
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %wide.trip.count114
+  br i1 %exitcond, label %inner2.exit, label %inner2.body
+
+inner2.exit:
+  br label %middle2.latch
+
+middle2.latch:
+  %iv.next105 = add nuw nsw i64 %iv104, 1
+  %exitcond107 = icmp eq i64 %iv.next105, %wide.trip.count118
+  br i1 %exitcond107, label %middle2.exit, label %middle2.body.inner2.guard
+
+middle2.exit:
+  br label %outer2.latch
+
+outer2.latch:
+  %iv.next109 = add nuw nsw i64 %iv108, 1
+  %exitcond111 = icmp eq i64 %iv.next109, %wide.trip.count122
+  br i1 %exitcond111, label %outer2.exit, label %outer2.middle2.guard
+
+outer2.exit:
+  br label %func_exit
+
+func_exit:
+  ret i32 undef
+}


        


More information about the llvm-commits mailing list