[llvm] 8393b9f - [LoopInterchange] Move instructions from preheader to outer loop header.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 10 04:45:53 PDT 2020


Author: Florian Hahn
Date: 2020-08-10T12:41:33+01:00
New Revision: 8393b9fd1f36d9273fa0720872e3996495aacc1c

URL: https://github.com/llvm/llvm-project/commit/8393b9fd1f36d9273fa0720872e3996495aacc1c
DIFF: https://github.com/llvm/llvm-project/commit/8393b9fd1f36d9273fa0720872e3996495aacc1c.diff

LOG: [LoopInterchange] Move instructions from preheader to outer loop header.

Instructions defined in the original inner loop preheader may depend on
values defined in the outer loop header, but the inner loop header will
become the entry block in the loop nest. Move the instructions from the
preheader to the outer loop header, so we do not break dominance. We
also have to check for unsafe instructions in the preheader. If there
are no unsafe instructions, all instructions should be movable.

Currently we move all instructions except the terminator and rely on
LICM to hoist out invariant instructions later.

Fixes PR45743

Added: 
    llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopInterchange.cpp
    llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index cf783e947a1c..973194482ab5 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -625,6 +625,13 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
       containsUnsafeInstructions(OuterLoopLatch))
     return false;
 
+  // Also make sure the inner loop preheader does not contain any unsafe
+  // instructions. Note that all instructions in the preheader will be moved to
+  // the outer loop header when interchanging.
+  if (InnerLoopPreHeader != OuterLoopHeader &&
+      containsUnsafeInstructions(InnerLoopPreHeader))
+    return false;
+
   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
   // We have a perfect loop nest.
   return true;
@@ -1306,6 +1313,21 @@ bool LoopInterchangeTransform::transform() {
     LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
   }
 
+  // Instructions in the original inner loop preheader may depend on values
+  // defined in the outer loop header. Move them there, because the original
+  // inner loop preheader will become the entry into the interchanged loop nest.
+  // Currently we move all instructions and rely on LICM to move invariant
+  // instructions outside the loop nest.
+  BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+  BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+  if (InnerLoopPreHeader != OuterLoopHeader) {
+    SmallPtrSet<Instruction *, 4> NeedsMoving;
+    for (Instruction &I :
+         make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
+                                         std::prev(InnerLoopPreHeader->end()))))
+      I.moveBefore(OuterLoopHeader->getTerminator());
+  }
+
   Transformed |= adjustLoopLinks();
   if (!Transformed) {
     LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");

diff  --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
index 57cabfc9bed6..3205a5465817 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
@@ -20,11 +20,11 @@ define void @lcssa_08(i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[CMP24:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP24]], label [[INNER_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       outer.preheader:
-; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M:%.*]] to i64
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
 ; CHECK-NEXT:    [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ]
-; CHECK-NEXT:    [[CMP222:%.*]] = icmp sgt i32 [[M]], 0
+; CHECK-NEXT:    [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64
 ; CHECK-NEXT:    br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]]
 ; CHECK:       inner.preheader:
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64
@@ -41,8 +41,9 @@ define void @lcssa_08(i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br label [[INNER_CRIT_EDGE:%.*]]
 ; CHECK:       inner.for.body.split:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ]
 ; CHECK-NEXT:    [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]]
 ; CHECK:       inner.crit_edge:
 ; CHECK-NEXT:    br label [[OUTER_LATCH]]

diff  --git a/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
new file mode 100644
index 000000000000..f50fbb0da8e3
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/pr45743-move-from-inner-preheader.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-interchange -S %s | FileCheck %s
+
+ at global = external local_unnamed_addr global [2 x [10 x i32]], align 16
+
+; We need to move %tmp4 from the inner loop pre header to the outer loop header
+; before interchanging.
+define void @test1() local_unnamed_addr #0 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[INNER_PH:%.*]]
+; CHECK:       outer.header.preheader:
+; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
+; CHECK:       outer.header:
+; CHECK-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED:%.*]], [[OUTER_HEADER_PREHEADER]] ], [ [[RED_NEXT:%.*]], [[OUTER_LATCH]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
+; CHECK-NEXT:    br label [[INNER_SPLIT1:%.*]]
+; CHECK:       inner.ph:
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[TMP0:%.*]], [[INNER_SPLIT:%.*]] ]
+; CHECK-NEXT:    [[OUTER_RED]] = phi i32 [ [[RED_NEXT_LCSSA:%.*]], [[INNER_SPLIT]] ], [ 0, [[INNER_PH]] ]
+; CHECK-NEXT:    br label [[OUTER_HEADER_PREHEADER]]
+; CHECK:       inner.split1:
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
+; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[RED_NEXT]] = or i32 [[INNER_RED]], 20
+; CHECK-NEXT:    [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
+; CHECK-NEXT:    [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
+; CHECK-NEXT:    br label [[OUTER_LATCH]]
+; CHECK:       inner.split:
+; CHECK-NEXT:    [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[OUTER_LATCH]] ]
+; CHECK-NEXT:    [[TMP0]] = add nsw i64 [[INNER_IV]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 400
+; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT:%.*]], label [[INNER]]
+; CHECK:       outer.latch:
+; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
+; CHECK-NEXT:    [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
+; CHECK-NEXT:    br i1 [[EC_2]], label [[INNER_SPLIT]], label [[OUTER_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br label %outer.header
+
+outer.header:                                              ; preds = %bb11, %bb
+  %outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
+  %outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
+  br label %inner.ph
+
+inner.ph:                                              ; preds = %bb1
+  %tmp4 = add nsw i64 %outer.iv, 9
+  br label %inner
+
+inner:                                              ; preds = %bb5, %bb3
+  %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
+  %inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
+  %ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
+  store i32 0, i32* %ptr
+  %red.next = or i32 %inner.red, 20
+  %inner.iv.next = add nsw i64 %inner.iv, 1
+  %ec.1 = icmp eq i64 %inner.iv.next, 400
+  br i1 %ec.1, label %outer.latch, label %inner
+
+outer.latch:                                             ; preds = %bb5
+  %red.next.lcssa = phi i32 [ %red.next, %inner ]
+  %outer.iv.next = add nsw i64 %outer.iv, 1
+  %ec.2 = icmp eq i64 %outer.iv.next, 400
+  br i1 %ec.2, label %exit, label %outer.header
+
+exit:                                             ; preds = %bb11
+  ret void
+}
+
+declare void @side_effect()
+
+; Cannot interchange, as the inner loop preheader contains a call to a function
+; with side effects.
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
+; CHECK:       outer.header:
+; CHECK-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
+; CHECK-NEXT:    [[OUTER_RED:%.*]] = phi i32 [ 0, [[BB]] ], [ [[RED_NEXT_LCSSA:%.*]], [[OUTER_LATCH]] ]
+; CHECK-NEXT:    br label [[INNER_PH:%.*]]
+; CHECK:       inner.ph:
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i64 [[OUTER_IV]], 9
+; CHECK-NEXT:    call void @side_effect()
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    [[INNER_IV:%.*]] = phi i64 [ 0, [[INNER_PH]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
+; CHECK-NEXT:    [[INNER_RED:%.*]] = phi i32 [ [[OUTER_RED]], [[INNER_PH]] ], [ [[RED_NEXT:%.*]], [[INNER]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[TMP4]]
+; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[RED_NEXT]] = or i32 [[INNER_RED]], 20
+; CHECK-NEXT:    [[INNER_IV_NEXT]] = add nsw i64 [[INNER_IV]], 1
+; CHECK-NEXT:    [[EC_1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 400
+; CHECK-NEXT:    br i1 [[EC_1]], label [[OUTER_LATCH]], label [[INNER]]
+; CHECK:       outer.latch:
+; CHECK-NEXT:    [[RED_NEXT_LCSSA]] = phi i32 [ [[RED_NEXT]], [[INNER]] ]
+; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
+; CHECK-NEXT:    [[EC_2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 400
+; CHECK-NEXT:    br i1 [[EC_2]], label [[EXIT:%.*]], label [[OUTER_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+bb:
+  br label %outer.header
+
+outer.header:                                              ; preds = %bb11, %bb
+  %outer.iv = phi i64 [ 0, %bb ], [ %outer.iv.next, %outer.latch ]
+  %outer.red = phi i32 [ 0, %bb ], [ %red.next.lcssa, %outer.latch ]
+  br label %inner.ph
+
+inner.ph:                                              ; preds = %bb1
+  %tmp4 = add nsw i64 %outer.iv, 9
+  call void @side_effect()
+  br label %inner
+
+inner:                                              ; preds = %bb5, %bb3
+  %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
+  %inner.red = phi i32 [ %outer.red, %inner.ph ], [ %red.next, %inner ]
+  %ptr = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @global, i64 0, i64 %inner.iv, i64 %tmp4
+  store i32 0, i32* %ptr
+  %red.next = or i32 %inner.red, 20
+  %inner.iv.next = add nsw i64 %inner.iv, 1
+  %ec.1 = icmp eq i64 %inner.iv.next, 400
+  br i1 %ec.1, label %outer.latch, label %inner
+
+outer.latch:                                             ; preds = %bb5
+  %red.next.lcssa = phi i32 [ %red.next, %inner ]
+  %outer.iv.next = add nsw i64 %outer.iv, 1
+  %ec.2 = icmp eq i64 %outer.iv.next, 400
+  br i1 %ec.2, label %exit, label %outer.header
+
+exit:                                             ; preds = %bb11
+  ret void
+}


        


More information about the llvm-commits mailing list