[llvm] db45746 - [LoopUnroll] Separate peeling from unrolling
    Nikita Popov via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Sat Jun  5 01:32:08 PDT 2021
    
    
  
Author: Nikita Popov
Date: 2021-06-05T10:32:00+02:00
New Revision: db45746821ab01a54f8df033991c3280c4284e3b
URL: https://github.com/llvm/llvm-project/commit/db45746821ab01a54f8df033991c3280c4284e3b
DIFF: https://github.com/llvm/llvm-project/commit/db45746821ab01a54f8df033991c3280c4284e3b.diff
LOG: [LoopUnroll] Separate peeling from unrolling
Loop peeling is currently performed as part of UnrollLoop().
Outside test scenarios, it is always performed with an unroll
count of 1. This means that unrolling doesn't actually do anything
apart from performing post-unroll simplification.
When testing, it's currently possible to specify both an explicit
peel count and an explicit unroll count. This doesn't perform any
sensible operation and may result in miscompiles, see
https://bugs.llvm.org/show_bug.cgi?id=45939.
This patch moves peeling from UnrollLoop() into tryToUnrollLoop(),
so that peeling does not also perform a susequent unroll. We only
run the post-unroll simplifications. Specifying both an explicit
peel count and unroll count is forbidden.
In the future, we may want to support both (non-PGO) peeling a
loop and unrolling it, but this needs to be done by first performing
the peel and then recalculating unrolling heuristics on a now
possibly analyzable loop.
Differential Revision: https://reviews.llvm.org/D103362
Added: 
    llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll
Modified: 
    llvm/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/test/Transforms/LoopUnroll/pr33437.ll
    llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll
    llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll
Removed: 
    
################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 1f09f648c0df8..452402ae72685 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -71,7 +71,6 @@ struct UnrollLoopOptions {
   bool AllowRuntime;
   bool AllowExpensiveTripCount;
   unsigned TripMultiple;
-  unsigned PeelCount;
   bool UnrollRemainder;
   bool ForgetAllSCEV;
 };
diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index e228b06917187..7b09d8e20fa8b 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -771,6 +771,18 @@ bool llvm::computeUnrollCount(
 
   UnrollCostEstimator UCE(*L, LoopSize);
 
+  // Use an explicit peel count that has been specified for testing. In this
+  // case it's not permitted to also specify an explicit unroll count.
+  if (PP.PeelCount) {
+    if (UnrollCount.getNumOccurrences() > 0) {
+      report_fatal_error("Cannot specify both explicit peel count and "
+                         "explicit unroll count");
+    }
+    UP.Count = 1;
+    UP.Runtime = false;
+    return true;
+  }
+
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -1158,6 +1170,28 @@ static LoopUnrollResult tryToUnrollLoop(
   if (TripCount && UP.Count > TripCount)
     UP.Count = TripCount;
 
+  if (PP.PeelCount) {
+    assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
+    LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
+                      << " with iteration count " << PP.PeelCount << "!\n");
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
+                                L->getHeader())
+             << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount)
+             << " iterations";
+    });
+
+    if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) {
+      simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
+      // If the loop was peeled, we already "used up" the profile information
+      // we had, so we don't want to unroll or peel again.
+      if (PP.PeelProfiledIterations)
+        L->setLoopAlreadyUnrolled();
+      return LoopUnrollResult::PartiallyUnrolled;
+    }
+    return LoopUnrollResult::Unmodified;
+  }
+
   // Save loop properties before it is transformed.
   MDNode *OrigLoopID = L->getLoopID();
 
@@ -1166,7 +1200,7 @@ static LoopUnrollResult tryToUnrollLoop(
   LoopUnrollResult UnrollResult = UnrollLoop(
       L,
       {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
-       TripMultiple, PP.PeelCount, UP.UnrollRemainder, ForgetAllSCEV},
+       TripMultiple, UP.UnrollRemainder, ForgetAllSCEV},
       LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
   if (UnrollResult == LoopUnrollResult::Unmodified)
     return LoopUnrollResult::Unmodified;
@@ -1194,10 +1228,7 @@ static LoopUnrollResult tryToUnrollLoop(
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
-  // If the loop was peeled, we already "used up" the profile information
-  // we had, so we don't want to unroll or peel again.
-  if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
-      (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount)))
+  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)
     L->setLoopAlreadyUnrolled();
 
   return UnrollResult;
diff  --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index fe0833aee69c8..f7590accb31d5 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -59,7 +59,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -259,9 +258,6 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
 /// AllowExpensiveTripCount is false.
 ///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the
-/// number of iterations we want to peel off.
-///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
@@ -311,7 +307,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
     ULO.Count = ULO.TripCount;
 
   // Don't enter the unroll code if there is nothing to do.
-  if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
+  if (ULO.TripCount == 0 && ULO.Count < 2) {
     LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
     return LoopUnrollResult::Unmodified;
   }
@@ -320,25 +316,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   assert(ULO.TripMultiple > 0);
   assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
 
-
-  bool Peeled = false;
-  if (ULO.PeelCount) {
-    Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
-
-    // Successful peeling may result in a change in the loop preheader/trip
-    // counts. If we later unroll the loop, we want these to be updated.
-    if (Peeled) {
-      // According to our guards and profitability checks the only
-      // meaningful exit should be latch block. Other exits go to deopt,
-      // so we do not worry about them.
-      BasicBlock *ExitingBlock = L->getLoopLatch();
-      assert(ExitingBlock && "Loop without exiting block?");
-      assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
-      ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
-      ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
-    }
-  }
-
   // Are we eliminating the loop control altogether?  Note that we can know
   // we're eliminating the backedge without knowing exactly which iteration
   // of the unrolled body exits.
@@ -350,10 +327,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   bool RuntimeTripCount =
       (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
 
-  assert((!RuntimeTripCount || !ULO.PeelCount) &&
-         "Did not expect runtime trip-count unrolling "
-         "and peeling for the same loop");
-
   // All these values should be taken only after peeling because they might have
   // changed.
   BasicBlock *Preheader = L->getLoopPreheader();
@@ -396,9 +369,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   else if (BasicBlock *ExitingBlock = L->getExitingBlock())
     ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
-    // If the peeling guard is changed this assert may be relaxed or even
-    // deleted.
-    assert(!Peeled && "Peeling guard changed!");
     LLVM_DEBUG(
         dbgs() << "Can't unroll; a conditional latch must exit the loop");
     return LoopUnrollResult::Unmodified;
@@ -473,16 +443,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
                << "completely unrolled loop with "
                << NV("UnrollCount", ULO.TripCount) << " iterations";
       });
-  } else if (ULO.PeelCount) {
-    LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
-                      << " with iteration count " << ULO.PeelCount << "!\n");
-    if (ORE)
-      ORE->emit([&]() {
-        return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
-                                  L->getHeader())
-               << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
-               << " iterations";
-      });
   } else {
     auto DiagBuilder = [&]() {
       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
@@ -835,8 +795,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
 
   // At this point, the code is well formed.  We now simplify the unrolled loop,
   // doing constant propagation and dead code elimination as we go.
-  simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
-                          SE, DT, AC, TTI);
+  simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
+                          TTI);
 
   NumCompletelyUnrolled += CompletelyUnroll;
   ++NumUnrolled;
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 767b0728cdf35..5281057270714 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -987,7 +987,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
                    {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
                     /*Force*/ false, /*AllowRuntime*/ false,
                     /*AllowExpensiveTripCount*/ false, /*TripMultiple*/ 1,
-                    /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
+                    /*UnrollRemainder*/ false, ForgetAllSCEV},
                    LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
   }
 
diff  --git a/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll
new file mode 100644
index 0000000000000..e855ee8888c58
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll
@@ -0,0 +1,22 @@
+; RUN: not --crash opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: Cannot specify both explicit peel count and explicit unroll count
+
+ at a = global [8 x i32] zeroinitializer, align 16
+
+define void @test1() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = trunc i64 %indvars.iv to i32
+  store i32 %0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.body, label %for.exit
+
+for.exit:                        ; preds = %for.body
+  ret void
+}
diff  --git a/llvm/test/Transforms/LoopUnroll/pr33437.ll b/llvm/test/Transforms/LoopUnroll/pr33437.ll
index 55c17e0622850..7bf2a0b6d9971 100644
--- a/llvm/test/Transforms/LoopUnroll/pr33437.ll
+++ b/llvm/test/Transforms/LoopUnroll/pr33437.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -loop-unroll -unroll-count=4 -unroll-peel-count=1 < %s | FileCheck %s
+; RUN: opt -S -loop-unroll -unroll-peel-count=1 < %s | FileCheck %s
 
 declare zeroext i8 @patatino()
 
@@ -7,6 +7,8 @@ define fastcc void @tinky() {
 ; CHECK-LABEL: @tinky(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       loopexit.loopexit:
+; CHECK-NEXT:    br label [[LOOPEXIT:%.*]]
 ; CHECK:       loopexit:
 ; CHECK-NEXT:    ret void
 ; CHECK:       next:
@@ -15,7 +17,7 @@ define fastcc void @tinky() {
 ; CHECK-NEXT:    br label [[LOOP_PEEL:%.*]]
 ; CHECK:       loop.peel:
 ; CHECK-NEXT:    [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT]]
 ; CHECK:       loop.peel.next:
 ; CHECK-NEXT:    br label [[LOOP_PEEL_NEXT1:%.*]]
 ; CHECK:       loop.peel.next1:
@@ -24,7 +26,7 @@ define fastcc void @tinky() {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[CALL593:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    br label [[LOOPEXIT]]
+; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ;
 entry:
   br label %next
@@ -66,19 +68,11 @@ define void @tinky2() {
 ; CHECK:       next.peel.newph:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT_3:%.*]], [[LOOP_2:%.*]] ]
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[CALL593:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    [[B_NEXT:%.*]] = add nuw nsw i32 [[B]], 1
-; CHECK-NEXT:    [[CALL593_1:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    [[B_NEXT_1:%.*]] = add nuw nsw i32 [[B_NEXT]], 1
-; CHECK-NEXT:    [[COND_1:%.*]] = icmp ne i32 [[B_NEXT]], 30
-; CHECK-NEXT:    br i1 [[COND_1]], label [[LOOP_2]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !0
-; CHECK:       loop.2:
-; CHECK-NEXT:    [[CALL593_2:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    [[B_NEXT_2:%.*]] = add nuw nsw i32 [[B_NEXT_1]], 1
-; CHECK-NEXT:    [[CALL593_3:%.*]] = tail call zeroext i8 @patatino()
-; CHECK-NEXT:    [[B_NEXT_3]] = add nuw nsw i32 [[B_NEXT_2]], 1
-; CHECK-NEXT:    br label [[LOOP]], !llvm.loop !2
+; CHECK-NEXT:    [[B_NEXT]] = add nuw nsw i32 [[B]], 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[B]], 30
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
 ;
 entry:
   br label %next
diff  --git a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll
index 0b9ea76b3c80d..654b94c2434d6 100644
--- a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-unroll -unroll-peel-count=2 -S %s | FileCheck --check-prefix=PEEL2 %s
 ; RUN: opt -loop-unroll -unroll-peel-count=8 -S %s | FileCheck --check-prefix=PEEL8 %s
-; RUN: opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S %s | FileCheck --check-prefix=PEEL2UNROLL2 %s
 
 ; Test case for PR45939. Make sure unroll count is adjusted when loop is peeled and unrolled.
 
@@ -36,47 +35,17 @@ define void @test1() {
 ; PEEL2:       entry.peel.newph:
 ; PEEL2-NEXT:    br label [[FOR_BODY:%.*]]
 ; PEEL2:       for.body:
-; PEEL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_6:%.*]] ]
+; PEEL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; PEEL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
 ; PEEL2-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; PEEL2-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
-; PEEL2-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; PEEL2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]]
-; PEEL2-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; PEEL2-NEXT:    store i32 [[TMP4]], i32* [[ARRAYIDX_2]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]]
-; PEEL2-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; PEEL2-NEXT:    store i32 [[TMP5]], i32* [[ARRAYIDX_3]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]]
-; PEEL2-NEXT:    [[TMP6:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; PEEL2-NEXT:    store i32 [[TMP6]], i32* [[ARRAYIDX_4]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]]
-; PEEL2-NEXT:    [[TMP7:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; PEEL2-NEXT:    store i32 [[TMP7]], i32* [[ARRAYIDX_5]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; PEEL2-NEXT:    [[EXITCOND_5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_5]], 8
-; PEEL2-NEXT:    br i1 [[EXITCOND_5]], label [[FOR_BODY_6]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; PEEL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; PEEL2-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 8
+; PEEL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PEEL2:       for.exit.loopexit:
 ; PEEL2-NEXT:    br label [[FOR_EXIT]]
 ; PEEL2:       for.exit:
 ; PEEL2-NEXT:    ret void
-; PEEL2:       for.body.6:
-; PEEL2-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]]
-; PEEL2-NEXT:    [[TMP8:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; PEEL2-NEXT:    store i32 [[TMP8]], i32* [[ARRAYIDX_6]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; PEEL2-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]]
-; PEEL2-NEXT:    [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; PEEL2-NEXT:    store i32 [[TMP9]], i32* [[ARRAYIDX_7]], align 4
-; PEEL2-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
-; PEEL2-NEXT:    br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 ; PEEL8-LABEL: @test1(
 ; PEEL8-NEXT:  entry:
@@ -160,102 +129,16 @@ define void @test1() {
 ; PEEL8:       entry.peel.newph:
 ; PEEL8-NEXT:    br label [[FOR_BODY:%.*]]
 ; PEEL8:       for.body:
-; PEEL8-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ]
+; PEEL8-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL34]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; PEEL8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
 ; PEEL8-NEXT:    [[TMP8:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; PEEL8-NEXT:    store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_1:%.*]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; PEEL8-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; PEEL8-NEXT:    br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PEEL8:       for.exit.loopexit:
 ; PEEL8-NEXT:    br label [[FOR_EXIT]]
 ; PEEL8:       for.exit:
 ; PEEL8-NEXT:    ret void
-; PEEL8:       for.body.1:
-; PEEL8-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
-; PEEL8-NEXT:    [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; PEEL8-NEXT:    store i32 [[TMP9]], i32* [[ARRAYIDX_1]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_2:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.2:
-; PEEL8-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]]
-; PEEL8-NEXT:    [[TMP10:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; PEEL8-NEXT:    store i32 [[TMP10]], i32* [[ARRAYIDX_2]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_3:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.3:
-; PEEL8-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]]
-; PEEL8-NEXT:    [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; PEEL8-NEXT:    store i32 [[TMP11]], i32* [[ARRAYIDX_3]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_4:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.4:
-; PEEL8-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]]
-; PEEL8-NEXT:    [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; PEEL8-NEXT:    store i32 [[TMP12]], i32* [[ARRAYIDX_4]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_5:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.5:
-; PEEL8-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]]
-; PEEL8-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; PEEL8-NEXT:    store i32 [[TMP13]], i32* [[ARRAYIDX_5]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_6:%.*]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.6:
-; PEEL8-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]]
-; PEEL8-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; PEEL8-NEXT:    store i32 [[TMP14]], i32* [[ARRAYIDX_6]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY_7]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP0]]
-; PEEL8:       for.body.7:
-; PEEL8-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]]
-; PEEL8-NEXT:    [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; PEEL8-NEXT:    store i32 [[TMP15]], i32* [[ARRAYIDX_7]], align 4
-; PEEL8-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
-; PEEL8-NEXT:    br i1 true, label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
-;
-; PEEL2UNROLL2-LABEL: @test1(
-; PEEL2UNROLL2-NEXT:  entry:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
-; PEEL2UNROLL2:       for.body.peel.begin:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
-; PEEL2UNROLL2:       for.body.peel:
-; PEEL2UNROLL2-NEXT:    [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0
-; PEEL2UNROLL2-NEXT:    [[TMP0:%.*]] = trunc i64 0 to i32
-; PEEL2UNROLL2-NEXT:    store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4
-; PEEL2UNROLL2-NEXT:    [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1
-; PEEL2UNROLL2-NEXT:    [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8
-; PEEL2UNROLL2-NEXT:    br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]]
-; PEEL2UNROLL2:       for.body.peel.next:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_BODY_PEEL2:%.*]]
-; PEEL2UNROLL2:       for.body.peel2:
-; PEEL2UNROLL2-NEXT:    [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]]
-; PEEL2UNROLL2-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32
-; PEEL2UNROLL2-NEXT:    store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4
-; PEEL2UNROLL2-NEXT:    [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1
-; PEEL2UNROLL2-NEXT:    [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8
-; PEEL2UNROLL2-NEXT:    br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]]
-; PEEL2UNROLL2:       for.body.peel.next1:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_BODY_PEEL_NEXT6:%.*]]
-; PEEL2UNROLL2:       for.body.peel.next6:
-; PEEL2UNROLL2-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
-; PEEL2UNROLL2:       entry.peel.newph:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_BODY:%.*]]
-; PEEL2UNROLL2:       for.body:
-; PEEL2UNROLL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ]
-; PEEL2UNROLL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
-; PEEL2UNROLL2-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; PEEL2UNROLL2-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4
-; PEEL2UNROLL2-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; PEEL2UNROLL2-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]]
-; PEEL2UNROLL2-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; PEEL2UNROLL2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4
-; PEEL2UNROLL2-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; PEEL2UNROLL2-NEXT:    [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8
-; PEEL2UNROLL2-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
-; PEEL2UNROLL2:       for.exit.loopexit:
-; PEEL2UNROLL2-NEXT:    br label [[FOR_EXIT]]
-; PEEL2UNROLL2:       for.exit:
-; PEEL2UNROLL2-NEXT:    ret void
 ;
 entry:
   br label %for.body
diff  --git a/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll b/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll
index c618b290ed26d..78f459f36f9c7 100644
--- a/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll
+++ b/llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll
@@ -39,13 +39,13 @@ define i32 @test() {
 ; CHECK-NEXT:    [[TMP4]] = add nsw i32 [[TMP3]], [[TMP]]
 ; CHECK-NEXT:    br label [[BB5:%.*]]
 ; CHECK:       bb5:
-; CHECK-NEXT:    br i1 false, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    br i1 undef, label [[BB10:%.*]], label [[BB10]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    br i1 false, label [[BB12]], label [[BB17_LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[BB12]], label [[BB17_LOOPEXIT:%.*]]
 ; CHECK:       bb12:
-; CHECK-NEXT:    br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop !0
+; CHECK-NEXT:    br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       bb13.loopexit:
 ; CHECK-NEXT:    br label [[BB13]]
 ; CHECK:       bb13:
        
    
    
More information about the llvm-commits
mailing list