[llvm] r371595 - [LoopInterchange] Properly move condition, induction increment and ops to latch.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 11 01:23:23 PDT 2019


Author: fhahn
Date: Wed Sep 11 01:23:23 2019
New Revision: 371595

URL: http://llvm.org/viewvc/llvm-project?rev=371595&view=rev
Log:
[LoopInterchange] Properly move condition, induction increment and ops to latch.

Currently we only rely on the induction increment to come before the
condition to ensure the required instructions get moved to the new
latch.

This patch duplicates and moves the required instructions to the
newly created latch. We move the condition to the end of the new block,
then process its operands. We stop at operands that are defined
outside the loop, or are the induction PHI.

We duplicate the instructions and update the uses in the moved
instructions, to ensure other users remain intact. See the added
test2 for such an example.

Reviewers: efriedma, mcrosier

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D67367

Added:
    llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
    llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll
    llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll
    llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll
    llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp?rev=371595&r1=371594&r2=371595&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp Wed Sep 11 01:23:23 2019
@@ -410,7 +410,6 @@ public:
   void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
 
 private:
-  void splitInnerLoopLatch(Instruction *);
   void splitInnerLoopHeader();
   bool adjustLoopLinks();
   void adjustLoopPreheaders();
@@ -1226,7 +1225,7 @@ bool LoopInterchangeTransform::transform
 
   if (InnerLoop->getSubLoops().empty()) {
     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
-    LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
+    LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
     PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
     if (!InductionPHI) {
       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
@@ -1242,11 +1241,55 @@ bool LoopInterchangeTransform::transform
     if (&InductionPHI->getParent()->front() != InductionPHI)
       InductionPHI->moveBefore(&InductionPHI->getParent()->front());
 
-    // Split at the place were the induction variable is
-    // incremented/decremented.
-    // TODO: This splitting logic may not work always. Fix this.
-    splitInnerLoopLatch(InnerIndexVar);
-    LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
+    // Create a new latch block for the inner loop. We split at the
+    // current latch's terminator and then move the condition and all
+    // operands that are not either loop-invariant or the induction PHI into the
+    // new latch block.
+    BasicBlock *NewLatch =
+        SplitBlock(InnerLoop->getLoopLatch(),
+                   InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
+
+    SmallSetVector<Instruction *, 4> WorkList;
+    unsigned i = 0;
+    auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
+      for (; i < WorkList.size(); i++) {
+        // Duplicate instruction and move it the new latch. Update uses that
+        // have been moved.
+        Instruction *NewI = WorkList[i]->clone();
+        NewI->insertBefore(NewLatch->getFirstNonPHI());
+        assert(!NewI->mayHaveSideEffects() &&
+               "Moving instructions with side-effects may change behavior of "
+               "the loop nest!");
+        for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end();
+             UI != UE;) {
+          Use &U = *UI++;
+          Instruction *UserI = cast<Instruction>(U.getUser());
+          if (!InnerLoop->contains(UserI->getParent()) ||
+              UserI->getParent() == NewLatch || UserI == InductionPHI)
+            U.set(NewI);
+        }
+        // Add operands of moved instruction to the worklist, except if they are
+        // outside the inner loop or are the induction PHI.
+        for (Value *Op : WorkList[i]->operands()) {
+          Instruction *OpI = dyn_cast<Instruction>(Op);
+          if (!OpI ||
+              this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
+              OpI == InductionPHI)
+            continue;
+          WorkList.insert(OpI);
+        }
+      }
+    };
+
+    // FIXME: Should we interchange when we have a constant condition?
+    Instruction *CondI = dyn_cast<Instruction>(
+        cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
+            ->getCondition());
+    if (CondI)
+      WorkList.insert(CondI);
+    MoveInstructions();
+    WorkList.insert(cast<Instruction>(InnerIndexVar));
+    MoveInstructions();
 
     // Splits the inner loops phi nodes out into a separate basic block.
     BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1263,10 +1306,6 @@ bool LoopInterchangeTransform::transform
   return true;
 }
 
-void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
-  SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
-}
-
 /// \brief Move all instructions except the terminator from FromBB right before
 /// InsertBefore
 static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {

Modified: llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll?rev=371595&r1=371594&r2=371595&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll Wed Sep 11 01:23:23 2019
@@ -18,26 +18,28 @@ define void @interchange_01(i64 %k, i64
 ; CHECK:       for1.header.preheader:
 ; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
 ; CHECK:       for1.header:
-; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[J23:%.*]] = phi i64 [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR2_SPLIT1:%.*]]
 ; CHECK:       for2.preheader:
 ; CHECK-NEXT:    br label [[FOR2:%.*]]
 ; CHECK:       for2:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
 ; CHECK:       for2.split1:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]]
 ; CHECK-NEXT:    [[LV:%.*]] = load i64, i64* [[ARRAYIDX5]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[LV]], [[K:%.*]]
 ; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT:    [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99
 ; CHECK-NEXT:    br label [[FOR1_INC10]]
 ; CHECK:       for2.split:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 99
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12:%.*]], label [[FOR2]]
+; CHECK-NEXT:    [[TMP0]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[J]], 99
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END12:%.*]], label [[FOR2]]
 ; CHECK:       for1.inc10:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
-; CHECK-NEXT:    [[EXITCOND26:%.*]] = icmp eq i64 [[INDVARS_IV23]], 99
+; CHECK-NEXT:    [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1
+; CHECK-NEXT:    [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 99
 ; CHECK-NEXT:    br i1 [[EXITCOND26]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
 ; CHECK:       for.end12:
 ; CHECK-NEXT:    ret void
@@ -79,26 +81,28 @@ define void @interchange_02(i64 %k) {
 ; CHECK:       for1.header.preheader:
 ; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
 ; CHECK:       for1.header:
-; CHECK-NEXT:    [[INDVARS_IV19:%.*]] = phi i64 [ [[INDVARS_IV_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[J19:%.*]] = phi i64 [ [[J_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR3_SPLIT1:%.*]]
 ; CHECK:       for3.preheader:
 ; CHECK-NEXT:    br label [[FOR3:%.*]]
 ; CHECK:       for3:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
 ; CHECK:       for3.split1:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV19]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J19]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX5]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP0]], [[K:%.*]]
 ; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT:    [[J_NEXT:%.*]] = add nsw i64 [[J]], -1
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i64 [[J]], 0
 ; CHECK-NEXT:    br label [[FOR1_INC10]]
 ; CHECK:       for3.split:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
-; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR3]], label [[FOR_END11:%.*]]
+; CHECK-NEXT:    [[TMP1]] = add nsw i64 [[J]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i64 [[J]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR3]], label [[FOR_END11:%.*]]
 ; CHECK:       for1.inc10:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT20]] = add nuw nsw i64 [[INDVARS_IV19]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT20]], 100
+; CHECK-NEXT:    [[J_NEXT20]] = add nuw nsw i64 [[J19]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[J_NEXT20]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]]
 ; CHECK:       for.end11:
 ; CHECK-NEXT:    ret void
@@ -139,6 +143,28 @@ for.end11:
 ;; FIXME: DA misses this case after D35430
 
 define void @interchange_10() {
+; CHECK-LABEL: @interchange_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
+; CHECK:       for1.header:
+; CHECK-NEXT:    [[J23:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ]
+; CHECK-NEXT:    [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1
+; CHECK-NEXT:    br label [[FOR2:%.*]]
+; CHECK:       for2:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR2]] ], [ 1, [[FOR1_HEADER]] ]
+; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]]
+; CHECK-NEXT:    store i64 [[J]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J_NEXT24]]
+; CHECK-NEXT:    store i64 [[J23]], i64* [[ARRAYIDX10]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR1_INC10]], label [[FOR2]]
+; CHECK:       for1.inc10:
+; CHECK-NEXT:    [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 98
+; CHECK-NEXT:    br i1 [[EXITCOND26]], label [[FOR_END12:%.*]], label [[FOR1_HEADER]]
+; CHECK:       for.end12:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %for1.header
 

Modified: llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll?rev=371595&r1=371594&r2=371595&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll Wed Sep 11 01:23:23 2019
@@ -10,13 +10,18 @@
 
 define void @test_lcssa_indvars1()  {
 ; CHECK-LABEL: @test_lcssa_indvars1()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT:    %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
 ; CHECK-LABEL: inner.body.split:
 ; CHECK-NEXT:    %0 = phi i64 [ %iv.outer.next, %outer.latch ]
-; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0
+; CHECK-NEXT:    br i1 %[[COND]], label %exit, label %inner.body
 
 ; CHECK-LABEL: exit:
 ; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
-; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ]
 ; CHECK-NEXT:    store i64 %v8.lcssa.lcssa, i64* @b, align 4
 ; CHECK-NEXT:    store i64 %v4.lcssa, i64* @a, align 4
 
@@ -52,9 +57,14 @@ exit:
 
 define void @test_lcssa_indvars2()  {
 ; CHECK-LABEL: @test_lcssa_indvars2()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT:    %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
 ; CHECK-LABEL: inner.body.split:
 ; CHECK-NEXT:    %0 = phi i64 [ %iv.outer, %outer.latch ]
-; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[COND:[0-9]+]] = icmp eq i64 %[[IVNEXT]], 0
+; CHECK-NEXT:    br i1 %[[COND]], label %exit, label %inner.body
 
 ; CHECK-LABEL: exit:
 ; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
@@ -93,14 +103,19 @@ exit:
 
 define void @test_lcssa_indvars3()  {
 ; CHECK-LABEL: @test_lcssa_indvars3()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT:    %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
 ; CHECK-LABEL: inner.body.split:
 ; CHECK-NEXT:    %0 = phi i64 [ %iv.outer.next, %outer.latch ]
-; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT:    %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0
+; CHECK-NEXT:    br i1 %[[COND]], label %exit, label %inner.body
 
 ; CHECK-LABEL: exit:
 ; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
-; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
-; CHECK-NEXT:    %v8.lcssa.lcssa.2 = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa.2 = phi i64 [ %[[IVNEXT]], %inner.body.split ]
 ; CHECK-NEXT:    %r1 = add i64 %v8.lcssa.lcssa, %v8.lcssa.lcssa.2
 ; CHECK-NEXT:    store i64 %r1, i64* @b, align 4
 ; CHECK-NEXT:    store i64 %v4.lcssa, i64* @a, align 4
@@ -150,8 +165,12 @@ define void @no_reachable_exits() {
 ; CHECK-LABEL: inner.ph:
 ; CHECK-NEXT:    br label %inner.body
 ; CHECK-LABEL: inner.body:
-; CHECK-NEXT:    %tmp31 = phi i32 [ 0, %inner.ph ], [ %tmp6, %inner.body.split ]
+; CHECK-NEXT:    %tmp31 = phi i32 [ 0, %inner.ph ], [ %[[IVNEXT:[0-9]]], %inner.body.split ]
 ; CHECK-NEXT:    br label %outer.ph
+; CHECK-LABEL: inner.body.split:
+; CHECK-NEXT:    %[[IVNEXT]] = add nsw i32 %tmp31, 1
+; CHECK-NEXT:    br i1 false, label %inner.body, label %exit
+
 
 bb:
   br label %outer.ph

Modified: llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll?rev=371595&r1=371594&r2=371595&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll Wed Sep 11 01:23:23 2019
@@ -27,7 +27,7 @@ define void @test(i32 %T, [90 x i32]* no
 ; CHECK:       for3.preheader:
 ; CHECK-NEXT:    br label [[FOR3:%.*]]
 ; CHECK:       for3:
-; CHECK-NEXT:    [[K:%.*]] = phi i32 [ [[INC:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
+; CHECK-NEXT:    [[K:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
 ; CHECK:       for3.split1:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
@@ -35,11 +35,13 @@ define void @test(i32 %T, [90 x i32]* no
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
 ; CHECK-NEXT:    store i16 [[ADD15]], i16* [[ARRAYIDX]]
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
 ; CHECK-NEXT:    br label [[FOR2_INC16]]
 ; CHECK:       for3.split:
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[K]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
+; CHECK-NEXT:    [[TMP1]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 90
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
 ; CHECK:       for2.inc16:
 ; CHECK-NEXT:    [[INC17]] = add nuw nsw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90

Added: llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll?rev=371595&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll (added)
+++ llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll Wed Sep 11 01:23:23 2019
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
+
+ at b = external dso_local global [5 x i32], align 16
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY2_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    br label [[FOR_BODY2_SPLIT:%.*]]
+; CHECK:       for.body2.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
+; CHECK:       for.body2:
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_PREHEADER]]
+; CHECK:       for.body2.split:
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    store i32 undef, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT:    [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT:    br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
+; CHECK:       for.inc.split:
+; CHECK-NEXT:    [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
+; CHECK:       for.cond1.for.end_crit_edge:
+; CHECK-NEXT:    br label [[FOR_INC3]]
+; CHECK:       for.inc3:
+; CHECK-NEXT:    [[INC4]] = add nsw i32 [[INC41]], 1
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
+; CHECK:       for.cond.for.end5_crit_edge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc3, %entry
+  %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ]
+  br label %for.body2
+
+for.body2:                                        ; preds = %for.inc, %for.body
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body2
+  %idxprom = sext i32 %inc41 to i64
+  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  store i32 undef, i32* %arrayidx, align 4
+  %cmp = icmp slt i32 %lsr.iv, 4
+  %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1
+  br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge
+
+for.cond1.for.end_crit_edge:                      ; preds = %for.inc
+  br label %for.inc3
+
+for.inc3:                                         ; preds = %for.cond1.for.end_crit_edge
+  %inc4 = add nsw i32 %inc41, 1
+  br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge
+
+for.cond.for.end5_crit_edge:                      ; preds = %for.inc3
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY2_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    br label [[FOR_BODY2_SPLIT:%.*]]
+; CHECK:       for.body2.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
+; CHECK:       for.body2:
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_PREHEADER]]
+; CHECK:       for.body2.split:
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT:    [[CMP_ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT:    store i32 [[CMP_ZEXT]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT:    br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
+; CHECK:       for.inc.split:
+; CHECK-NEXT:    [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
+; CHECK:       for.cond1.for.end_crit_edge:
+; CHECK-NEXT:    br label [[FOR_INC3]]
+; CHECK:       for.inc3:
+; CHECK-NEXT:    [[INC4]] = add nsw i32 [[INC41]], 1
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
+; CHECK:       for.cond.for.end5_crit_edge:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc3, %entry
+  %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ]
+  br label %for.body2
+
+for.body2:                                        ; preds = %for.inc, %for.body
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body2
+  %idxprom = sext i32 %inc41 to i64
+  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp = icmp slt i32 %lsr.iv, 4
+  %cmp.zext = zext i1 %cmp to i32
+  store i32 %cmp.zext, i32* %arrayidx, align 4
+  %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1
+  br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge
+
+for.cond1.for.end_crit_edge:                      ; preds = %for.inc
+  br label %for.inc3
+
+for.inc3:                                         ; preds = %for.cond1.for.end_crit_edge
+  %inc4 = add nsw i32 %inc41, 1
+  br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge
+
+for.cond.for.end5_crit_edge:                      ; preds = %for.inc3
+  ret void
+}

Modified: llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll?rev=371595&r1=371594&r2=371595&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll (original)
+++ llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll Wed Sep 11 01:23:23 2019
@@ -31,6 +31,8 @@ define i64 @test1([100 x [100 x i64]]* %
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
 ; CHECK-NEXT:    [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
+; CHECK-NEXT:    [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100
 ; CHECK-NEXT:    br label [[FOR1_INC]]
 ; CHECK:       for2.split:
 ; CHECK-NEXT:    [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]




More information about the llvm-commits mailing list