[llvm] fa82a3d - [runtimeunroll] Support epilogue unrolling with a parent loop

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 2 16:53:45 PDT 2021


Author: Philip Reames
Date: 2021-09-02T16:29:20-07:00
New Revision: fa82a3d016c9fb82c1b81834fa18cd4c797735a0

URL: https://github.com/llvm/llvm-project/commit/fa82a3d016c9fb82c1b81834fa18cd4c797735a0
DIFF: https://github.com/llvm/llvm-project/commit/fa82a3d016c9fb82c1b81834fa18cd4c797735a0.diff

LOG: [runtimeunroll] Support epilogue unrolling with a parent loop

This patch adds support for unrolling inner loops using epilogue unrolling. The basic issue is that the original latch exit block of the inner loop could be outside the outer loop.  When we clone the inner loop and split the latch exit, the cloned blocks need to be in the outer loop.

Differential Revision: https://reviews.llvm.org/D108476

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 105c628fe0cfa..be062b46dc469 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -423,14 +423,6 @@ static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
   if (!PreserveLCSSA)
     return false;
 
-  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
-  // and L is an inner loop. This is because in presence of multiple exits, the
-  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
-  // outer loop. This is automatically handled in the prolog case, so we do not
-  // have that bug in prolog generation.
-  if (UseEpilogRemainder && L->getParentLoop())
-    return false;
-
   // All constraints have been satisfied.
   return true;
 }
@@ -456,6 +448,11 @@ static bool canProfitablyUnrollMultiExitLoop(
   if (!LatchExit->getSinglePredecessor())
     return false;
 
+  // TODO: We used to bail out for correctness (now fixed).  Under what
+  // circumstances is this case profitable to allow?
+  if (UseEpilogRemainder && L->getParentLoop())
+    return false;
+
   // The main pain point with multi-exit loop unrolling is that once unrolled,
   // we will not be able to merge all blocks into a straight line code.
   // There are branches within the unrolled loop that go to the OtherExits.
@@ -729,6 +726,21 @@ bool llvm::UnrollRuntimeLoopRemainder(
     // Split NewExit to insert epilog remainder loop.
     EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
     EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+
+    // If the latch exits from multiple level of nested loops, then
+    // by assumption there must be another loop exit which branches to the
+    // outer loop and we must adjust the loop for the newly inserted blocks
+    // to account for the fact that our epilogue is still in the same outer
+    // loop. Note that this leaves loopinfo temporarily out of sync with the
+    // CFG until the actual epilogue loop is inserted.
+    if (auto *ParentL = L->getParentLoop())
+      if (LI->getLoopFor(LatchExit) != ParentL) {
+        LI->removeBlock(NewExit);
+        ParentL->addBasicBlockToLoop(NewExit, *LI);
+        LI->removeBlock(EpilogPreHeader);
+        ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
+      }
+
   } else {
     // If prolog remainder
     // Split the original preheader twice to insert prolog remainder loop

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 73bf94cd7982a..2d961aad6a5c4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -4271,74 +4271,162 @@ loopexit1:                                             ; preds = %header
   ret i32 %sext3
 }
 
-; Nested loop and inner loop is unrolled
-; FIXME: we cannot unroll with epilog remainder currently, because
-; the outer loop does not contain the epilog preheader and epilog exit (while
-; infact it should). This causes us to choke up on LCSSA form being incorrect in
-; outer loop. However, the exit block where LCSSA fails, is infact still within
-; the outer loop. For now, we just bail out in presence of outer loop and epilog
-; loop is generated.
-; The outer loop header is the preheader for the inner loop and the inner header
-; branches back to the outer loop.
+; Nested loop and inner loop is unrolled.  The tricky bit here is that
+; several blocks in the inner loop fall out of the outer block when we
+; use the epilogue block remainder strategy.
 define void @test8() {
 ; EPILOG-LABEL: @test8(
 ; EPILOG-NEXT:  bb:
 ; EPILOG-NEXT:    br label %outerloop
+; EPILOG:       outerloop.loopexit.loopexit:
+; EPILOG-NEXT:    br label %outerloop.loopexit
+; EPILOG:       outerloop.loopexit.loopexit1:
+; EPILOG-NEXT:    br label %outerloop.loopexit
 ; EPILOG:       outerloop.loopexit:
 ; EPILOG-NEXT:    br label %outerloop
 ; EPILOG:       outerloop:
 ; EPILOG-NEXT:    %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit ]
+; EPILOG-NEXT:    %0 = sub i64 100, %i
+; EPILOG-NEXT:    %1 = sub i64 99, %i
+; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
+; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
+; EPILOG-NEXT:    br i1 %2, label %exit.unr-lcssa, label %outerloop.new
+; EPILOG:       outerloop.new:
+; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %innerH
 ; EPILOG:       innerH:
-; EPILOG-NEXT:    %i3 = phi i64 [ %i4, %latch ], [ %i, %outerloop ]
+; EPILOG-NEXT:    %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.7, %latch.7 ]
 ; EPILOG-NEXT:    %i4 = add nuw nsw i64 %i3, 1
-; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit, label %latch
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch
 ; EPILOG:       latch:
-; EPILOG-NEXT:    %i6 = icmp ult i64 %i4, 100
-; EPILOG-NEXT:    br i1 %i6, label %innerH, label %exit
+; EPILOG-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
+; EPILOG:       exit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ]
+; EPILOG-NEXT:    br label %exit.unr-lcssa
+; EPILOG:       exit.unr-lcssa:
+; EPILOG-NEXT:    %i3.unr = phi i64 [ %i, %outerloop ], [ %i3.unr.ph, %exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
+; EPILOG:       innerH.epil.preheader:
+; EPILOG-NEXT:    br label %innerH.epil
+; EPILOG:       innerH.epil:
+; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ]
+; EPILOG-NEXT:    %epil.iter = phi i64 [ %xtraiter, %innerH.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %i4.epil = add nuw nsw i64 %i3.epil, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %i6.epil = icmp ult i64 %i4.epil, 100
+; EPILOG-NEXT:    %epil.iter.sub = sub i64 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %innerH.epil, label %exit.epilog-lcssa, !llvm.loop !11
+; EPILOG:       exit.epilog-lcssa:
+; EPILOG-NEXT:    br label %exit
+; EPILOG:       exit.loopexit:
+; EPILOG-NEXT:    br label %exit
 ; EPILOG:       exit:
 ; EPILOG-NEXT:    ret void
+; EPILOG:       latch.1:
+; EPILOG-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-NEXT:    %i4.2 = add nuw nsw i64 %i4.1, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.2
+; EPILOG:       latch.2:
+; EPILOG-NEXT:    %niter.nsub.2 = sub i64 %niter.nsub.1, 1
+; EPILOG-NEXT:    %i4.3 = add nuw nsw i64 %i4.2, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.3
+; EPILOG:       latch.3:
+; EPILOG-NEXT:    %niter.nsub.3 = sub i64 %niter.nsub.2, 1
+; EPILOG-NEXT:    %i4.4 = add nuw nsw i64 %i4.3, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.4
+; EPILOG:       latch.4:
+; EPILOG-NEXT:    %niter.nsub.4 = sub i64 %niter.nsub.3, 1
+; EPILOG-NEXT:    %i4.5 = add nuw nsw i64 %i4.4, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.5
+; EPILOG:       latch.5:
+; EPILOG-NEXT:    %niter.nsub.5 = sub i64 %niter.nsub.4, 1
+; EPILOG-NEXT:    %i4.6 = add nuw nsw i64 %i4.5, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.6
+; EPILOG:       latch.6:
+; EPILOG-NEXT:    %niter.nsub.6 = sub i64 %niter.nsub.5, 1
+; EPILOG-NEXT:    %i4.7 = add nuw nsw i64 %i4.6, 1
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.7
+; EPILOG:       latch.7:
+; EPILOG-NEXT:    %niter.nsub.7 = sub i64 %niter.nsub.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.nsub.7, 0
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit
 ;
 ; EPILOG-BLOCK-LABEL: @test8(
 ; EPILOG-BLOCK-NEXT:  bb:
 ; EPILOG-BLOCK-NEXT:    br label %outerloop
+; EPILOG-BLOCK:       outerloop.loopexit.loopexit:
+; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit
 ; EPILOG-BLOCK:       outerloop.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %innerH.12
+; EPILOG-BLOCK-NEXT:    br i1 false, label %exit.unr-lcssa.1, label %outerloop.new.1
 ; EPILOG-BLOCK:       outerloop:
 ; EPILOG-BLOCK-NEXT:    %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
+; EPILOG-BLOCK-NEXT:    %0 = sub i64 100, %i
+; EPILOG-BLOCK-NEXT:    %1 = sub i64 99, %i
+; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
+; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit.unr-lcssa, label %outerloop.new
+; EPILOG-BLOCK:       outerloop.new:
+; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %innerH
 ; EPILOG-BLOCK:       innerH:
-; EPILOG-BLOCK-NEXT:    %i3 = phi i64 [ %i, %outerloop ], [ %i4.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %i3 = phi i64 [ %i, %outerloop.new ], [ %i4.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i64 [ %unroll_iter, %outerloop.new ], [ %niter.nsub.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %i4 = add nuw nsw i64 %i3, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit, label %latch
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch
 ; EPILOG-BLOCK:       latch:
-; EPILOG-BLOCK-NEXT:    %i6 = icmp ult i64 %i4, 100
-; EPILOG-BLOCK-NEXT:    br i1 %i6, label %innerH.1, label %exit.loopexit
-; EPILOG-BLOCK:       exit.loopexit:
+; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i64 %niter, 1
+; EPILOG-BLOCK-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit, label %latch.1
+; EPILOG-BLOCK:       exit.unr-lcssa.loopexit:
+; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa
+; EPILOG-BLOCK:       exit.unr-lcssa:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
+; EPILOG-BLOCK:       innerH.epil.preheader:
+; EPILOG-BLOCK-NEXT:    br label %innerH.epil
+; EPILOG-BLOCK:       innerH.epil:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit, label %latch.epil
+; EPILOG-BLOCK:       latch.epil:
 ; EPILOG-BLOCK-NEXT:    br label %exit
-; EPILOG-BLOCK:       exit.loopexit5:
+; EPILOG-BLOCK:       exit.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit
 ; EPILOG-BLOCK:       exit:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       innerH.1:
-; EPILOG-BLOCK-NEXT:    %i4.1 = add nuw nsw i64 %i4, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit, label %latch.1
 ; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %i6.1 = icmp ult i64 %i4.1, 100
-; EPILOG-BLOCK-NEXT:    br i1 %i6.1, label %innerH, label %exit.loopexit, !llvm.loop !11
-; EPILOG-BLOCK:       innerH.12:
-; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.loopexit ], [ %i4.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i64 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !11
+; EPILOG-BLOCK:       outerloop.new.1:
+; EPILOG-BLOCK-NEXT:    br label %innerH.1
+; EPILOG-BLOCK:       innerH.1:
+; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 100, %outerloop.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
 ; EPILOG-BLOCK-NEXT:    %i4.11 = add nuw nsw i64 %i3.1, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.14
-; EPILOG-BLOCK:       latch.14:
-; EPILOG-BLOCK-NEXT:    %i6.13 = icmp ult i64 %i4.11, 100
-; EPILOG-BLOCK-NEXT:    br i1 %i6.13, label %innerH.1.1, label %exit.loopexit5
-; EPILOG-BLOCK:       innerH.1.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13
+; EPILOG-BLOCK:       latch.13:
+; EPILOG-BLOCK-NEXT:    %niter.nsub.12 = sub i64 %niter.1, 1
 ; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i4.11, 1
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.1.1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
 ; EPILOG-BLOCK:       latch.1.1:
-; EPILOG-BLOCK-NEXT:    %i6.1.1 = icmp ult i64 %i4.1.1, 100
-; EPILOG-BLOCK-NEXT:    br i1 %i6.1.1, label %innerH.12, label %exit.loopexit5, !llvm.loop !11
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i64 %niter.nsub.12, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.nsub.1.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !11
+; EPILOG-BLOCK:       exit.unr-lcssa.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa.1
+; EPILOG-BLOCK:       outerloop.loopexit.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
+; EPILOG-BLOCK:       exit.unr-lcssa.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %innerH.epil.preheader.1, label %exit.loopexit
+; EPILOG-BLOCK:       innerH.epil.preheader.1:
+; EPILOG-BLOCK-NEXT:    br label %innerH.epil.1
+; EPILOG-BLOCK:       innerH.epil.1:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.epil
 ; EPILOG-BLOCK:       outerloop.loopexit.1:
 ; EPILOG-BLOCK-NEXT:    br label %outerloop, !llvm.loop !12
 ;
@@ -4515,6 +4603,8 @@ declare i8 addrspace(1)* @foo(i32)
 define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-LABEL: @test9(
 ; EPILOG-NEXT:  bb:
+; EPILOG-NEXT:    %0 = add i32 %n, -1
+; EPILOG-NEXT:    %1 = add i32 %n, -2
 ; EPILOG-NEXT:    br label %outerloopHdr
 ; EPILOG:       outerloopHdr:
 ; EPILOG-NEXT:    %trip = add i32 %n, -1
@@ -4522,86 +4612,180 @@ define i8 addrspace(1)* @test9(i8* nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    br i1 %outercnd, label %preheader, label %outerLatch
 ; EPILOG:       preheader:
 ; EPILOG-NEXT:    %i4 = zext i32 0 to i64
+; EPILOG-NEXT:    %xtraiter = and i32 %0, 7
+; EPILOG-NEXT:    %2 = icmp ult i32 %1, 7
+; EPILOG-NEXT:    br i1 %2, label %outerLatch.loopexit.unr-lcssa, label %preheader.new
+; EPILOG:       preheader.new:
+; EPILOG-NEXT:    %unroll_iter = sub i32 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
 ; EPILOG:       header:
-; EPILOG-NEXT:    %phi = phi i64 [ %i4, %preheader ], [ %iv.next, %latch ]
-; EPILOG-NEXT:    %i7 = trunc i64 %phi to i32
-; EPILOG-NEXT:    br i1 true, label %latch, label %innerexit
+; EPILOG-NEXT:    %phi = phi i64 [ %i4, %preheader.new ], [ %iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.7, %latch.7 ]
+; EPILOG-NEXT:    br i1 true, label %latch, label %innerexit.loopexit
+; EPILOG:       innerexit.loopexit:
+; EPILOG-NEXT:    %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ]
+; EPILOG-NEXT:    br label %innerexit
+; EPILOG:       innerexit.loopexit1:
+; EPILOG-NEXT:    %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ]
+; EPILOG-NEXT:    br label %innerexit
 ; EPILOG:       innerexit:
-; EPILOG-NEXT:    %trip.lcssa = phi i32 [ %trip, %header ]
+; EPILOG-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ]
 ; EPILOG-NEXT:    %i9 = call i8 addrspace(1)* @foo(i32 %trip.lcssa)
 ; EPILOG-NEXT:    ret i8 addrspace(1)* %i9
 ; EPILOG:       latch:
-; EPILOG-NEXT:    %i11 = add nsw i32 %i7, 1
-; EPILOG-NEXT:    %innercnd = icmp slt i32 %i11, %trip
 ; EPILOG-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
-; EPILOG-NEXT:    br i1 %innercnd, label %header, label %outerLatch.loopexit
+; EPILOG-NEXT:    %niter.nsub = sub i32 %niter, 1
+; EPILOG-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit
+; EPILOG:       outerLatch.loopexit.unr-lcssa.loopexit:
+; EPILOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    br label %outerLatch.loopexit.unr-lcssa
+; EPILOG:       outerLatch.loopexit.unr-lcssa:
+; EPILOG-NEXT:    %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %outerLatch.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
+; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    br label %header.epil
+; EPILOG:       header.epil:
+; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
+; EPILOG-NEXT:    %epil.iter = phi i32 [ %xtraiter, %header.epil.preheader ], [ %epil.iter.sub, %latch.epil ]
+; EPILOG-NEXT:    %i7.epil = trunc i64 %phi.epil to i32
+; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
+; EPILOG:       latch.epil:
+; EPILOG-NEXT:    %i11.epil = add nsw i32 %i7.epil, 1
+; EPILOG-NEXT:    %innercnd.epil = icmp slt i32 %i11.epil, %trip
+; EPILOG-NEXT:    %iv.next.epil = add nuw nsw i64 %phi.epil, 1
+; EPILOG-NEXT:    %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %outerLatch.loopexit.epilog-lcssa, !llvm.loop !12
+; EPILOG:       outerLatch.loopexit.epilog-lcssa:
+; EPILOG-NEXT:    br label %outerLatch.loopexit
 ; EPILOG:       outerLatch.loopexit:
 ; EPILOG-NEXT:    br label %outerLatch
 ; EPILOG:       outerLatch:
 ; EPILOG-NEXT:    br label %outerloopHdr
+; EPILOG:       latch.1:
+; EPILOG-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
+; EPILOG-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
+; EPILOG-NEXT:    br i1 true, label %latch.2, label %innerexit.loopexit
+; EPILOG:       latch.2:
+; EPILOG-NEXT:    %iv.next.2 = add nuw nsw i64 %iv.next.1, 1
+; EPILOG-NEXT:    %niter.nsub.2 = sub i32 %niter.nsub.1, 1
+; EPILOG-NEXT:    br i1 true, label %latch.3, label %innerexit.loopexit
+; EPILOG:       latch.3:
+; EPILOG-NEXT:    %iv.next.3 = add nuw nsw i64 %iv.next.2, 1
+; EPILOG-NEXT:    %niter.nsub.3 = sub i32 %niter.nsub.2, 1
+; EPILOG-NEXT:    br i1 true, label %latch.4, label %innerexit.loopexit
+; EPILOG:       latch.4:
+; EPILOG-NEXT:    %iv.next.4 = add nuw nsw i64 %iv.next.3, 1
+; EPILOG-NEXT:    %niter.nsub.4 = sub i32 %niter.nsub.3, 1
+; EPILOG-NEXT:    br i1 true, label %latch.5, label %innerexit.loopexit
+; EPILOG:       latch.5:
+; EPILOG-NEXT:    %iv.next.5 = add nuw nsw i64 %iv.next.4, 1
+; EPILOG-NEXT:    %niter.nsub.5 = sub i32 %niter.nsub.4, 1
+; EPILOG-NEXT:    br i1 true, label %latch.6, label %innerexit.loopexit
+; EPILOG:       latch.6:
+; EPILOG-NEXT:    %iv.next.6 = add nuw nsw i64 %iv.next.5, 1
+; EPILOG-NEXT:    %niter.nsub.6 = sub i32 %niter.nsub.5, 1
+; EPILOG-NEXT:    br i1 true, label %latch.7, label %innerexit.loopexit
+; EPILOG:       latch.7:
+; EPILOG-NEXT:    %iv.next.7 = add nuw nsw i64 %iv.next.6, 1
+; EPILOG-NEXT:    %niter.nsub.7 = sub i32 %niter.nsub.6, 1
+; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.nsub.7, 0
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit
 ;
 ; EPILOG-BLOCK-LABEL: @test9(
 ; EPILOG-BLOCK-NEXT:  bb:
+; EPILOG-BLOCK-NEXT:    %0 = add i32 %n, -1
+; EPILOG-BLOCK-NEXT:    %1 = add i32 %n, -2
 ; EPILOG-BLOCK-NEXT:    br label %outerloopHdr
 ; EPILOG-BLOCK:       outerloopHdr:
 ; EPILOG-BLOCK-NEXT:    %trip = add i32 %n, -1
 ; EPILOG-BLOCK-NEXT:    %outercnd = icmp slt i32 0, %trip
 ; EPILOG-BLOCK-NEXT:    br i1 %outercnd, label %preheader, label %outerLatch
 ; EPILOG-BLOCK:       preheader:
+; EPILOG-BLOCK-NEXT:    %xtraiter = and i32 %0, 1
+; EPILOG-BLOCK-NEXT:    %2 = icmp ult i32 %1, 1
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %outerLatch.loopexit.unr-lcssa, label %preheader.new
+; EPILOG-BLOCK:       preheader.new:
+; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i32 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
 ; EPILOG-BLOCK:       header:
-; EPILOG-BLOCK-NEXT:    %phi = phi i64 [ 0, %preheader ], [ %iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %i7 = trunc i64 %phi to i32
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch, label %innerexit.loopexit
+; EPILOG-BLOCK-NEXT:    %phi = phi i64 [ 0, %preheader.new ], [ %iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %niter = phi i32 [ %unroll_iter, %preheader.new ], [ %niter.nsub.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch, label %innerexit.loopexit.loopexit
+; EPILOG-BLOCK:       innerexit.loopexit.loopexit:
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ]
+; EPILOG-BLOCK-NEXT:    br label %innerexit.loopexit
+; EPILOG-BLOCK:       innerexit.loopexit.loopexit6:
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph7 = phi i32 [ %trip.1, %latch.15 ], [ %trip.1, %header.1 ]
+; EPILOG-BLOCK-NEXT:    br label %innerexit.loopexit
 ; EPILOG-BLOCK:       innerexit.loopexit:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph = phi i32 [ %trip, %header.1 ], [ %trip, %header ]
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph7, %innerexit.loopexit.loopexit6 ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit
-; EPILOG-BLOCK:       innerexit.loopexit7:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph8 = phi i32 [ %trip.1, %header.1.1 ], [ %trip.1, %header.12 ]
+; EPILOG-BLOCK:       innerexit.loopexit1:
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ], [ %trip.1, %header.epil.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit
 ; EPILOG-BLOCK:       innerexit:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph8, %innerexit.loopexit7 ]
+; EPILOG-BLOCK-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ]
 ; EPILOG-BLOCK-NEXT:    %i9 = call i8 addrspace(1)* @foo(i32 %trip.lcssa)
 ; EPILOG-BLOCK-NEXT:    ret i8 addrspace(1)* %i9
 ; EPILOG-BLOCK:       latch:
-; EPILOG-BLOCK-NEXT:    %i11 = add nuw nsw i32 %i7, 1
-; EPILOG-BLOCK-NEXT:    %innercnd = icmp slt i32 %i11, %trip
 ; EPILOG-BLOCK-NEXT:    %iv.next = add nuw nsw i64 %phi, 1
-; EPILOG-BLOCK-NEXT:    br i1 %innercnd, label %header.1, label %outerLatch.loopexit
+; EPILOG-BLOCK-NEXT:    %niter.nsub = sub i32 %niter, 1
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit.loopexit
+; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit:
+; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa
+; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
+; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    br label %header.epil
+; EPILOG-BLOCK:       header.epil:
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
+; EPILOG-BLOCK:       latch.epil:
+; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit
 ; EPILOG-BLOCK:       outerLatch.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch
 ; EPILOG-BLOCK:       outerLatch:
 ; EPILOG-BLOCK-NEXT:    %trip.1 = add i32 %n, -1
 ; EPILOG-BLOCK-NEXT:    %outercnd.1 = icmp slt i32 0, %trip.1
 ; EPILOG-BLOCK-NEXT:    br i1 %outercnd.1, label %preheader.1, label %outerLatch.1
-; EPILOG-BLOCK:       header.1:
-; EPILOG-BLOCK-NEXT:    %i7.1 = trunc i64 %iv.next to i32
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1, label %innerexit.loopexit
 ; EPILOG-BLOCK:       latch.1:
-; EPILOG-BLOCK-NEXT:    %i11.1 = add nsw i32 %i7.1, 1
-; EPILOG-BLOCK-NEXT:    %innercnd.1 = icmp slt i32 %i11.1, %trip
 ; EPILOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %iv.next, 1
-; EPILOG-BLOCK-NEXT:    br i1 %innercnd.1, label %header, label %outerLatch.loopexit, !llvm.loop !13
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1 = sub i32 %niter.nsub, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.nsub.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !13
 ; EPILOG-BLOCK:       preheader.1:
-; EPILOG-BLOCK-NEXT:    br label %header.12
-; EPILOG-BLOCK:       header.12:
-; EPILOG-BLOCK-NEXT:    %phi.1 = phi i64 [ 0, %preheader.1 ], [ %iv.next.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    %i7.11 = trunc i64 %phi.1 to i32
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.16, label %innerexit.loopexit7
-; EPILOG-BLOCK:       latch.16:
-; EPILOG-BLOCK-NEXT:    %i11.13 = add nuw nsw i32 %i7.11, 1
-; EPILOG-BLOCK-NEXT:    %innercnd.14 = icmp slt i32 %i11.13, %trip.1
-; EPILOG-BLOCK-NEXT:    %iv.next.15 = add nuw nsw i64 %phi.1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %innercnd.14, label %header.1.1, label %outerLatch.loopexit.1
-; EPILOG-BLOCK:       header.1.1:
-; EPILOG-BLOCK-NEXT:    %i7.1.1 = trunc i64 %iv.next.15 to i32
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1.1, label %innerexit.loopexit7
+; EPILOG-BLOCK-NEXT:    %xtraiter.1 = and i32 %0, 1
+; EPILOG-BLOCK-NEXT:    %3 = icmp ult i32 %1, 1
+; EPILOG-BLOCK-NEXT:    br i1 %3, label %outerLatch.loopexit.unr-lcssa.1, label %preheader.new.1
+; EPILOG-BLOCK:       preheader.new.1:
+; EPILOG-BLOCK-NEXT:    %unroll_iter.1 = sub i32 %0, %xtraiter.1
+; EPILOG-BLOCK-NEXT:    br label %header.1
+; EPILOG-BLOCK:       header.1:
+; EPILOG-BLOCK-NEXT:    %phi.1 = phi i64 [ 0, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    %niter.1 = phi i32 [ %unroll_iter.1, %preheader.new.1 ], [ %niter.nsub.1.1, %latch.1.1 ]
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.15, label %innerexit.loopexit.loopexit6
+; EPILOG-BLOCK:       latch.15:
+; EPILOG-BLOCK-NEXT:    %iv.next.13 = add nuw nsw i64 %phi.1, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.14 = sub i32 %niter.1, 1
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit6
 ; EPILOG-BLOCK:       latch.1.1:
-; EPILOG-BLOCK-NEXT:    %i11.1.1 = add nsw i32 %i7.1.1, 1
-; EPILOG-BLOCK-NEXT:    %innercnd.1.1 = icmp slt i32 %i11.1.1, %trip.1
-; EPILOG-BLOCK-NEXT:    %iv.next.1.1 = add nuw nsw i64 %iv.next.15, 1
-; EPILOG-BLOCK-NEXT:    br i1 %innercnd.1.1, label %header.12, label %outerLatch.loopexit.1, !llvm.loop !13
+; EPILOG-BLOCK-NEXT:    %iv.next.1.1 = add nuw nsw i64 %iv.next.13, 1
+; EPILOG-BLOCK-NEXT:    %niter.nsub.1.1 = sub i32 %niter.nsub.14, 1
+; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i32 %niter.nsub.1.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %header.1, label %outerLatch.loopexit.unr-lcssa.loopexit.1, !llvm.loop !13
+; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa.1
+; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.1:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0
+; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod.1, label %header.epil.preheader.1, label %outerLatch.loopexit.1
+; EPILOG-BLOCK:       header.epil.preheader.1:
+; EPILOG-BLOCK-NEXT:    br label %header.epil.1
+; EPILOG-BLOCK:       header.epil.1:
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil.1, label %innerexit.loopexit1
+; EPILOG-BLOCK:       latch.epil.1:
+; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.1
 ; EPILOG-BLOCK:       outerLatch.loopexit.1:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.1
 ; EPILOG-BLOCK:       outerLatch.1:


        


More information about the llvm-commits mailing list