[llvm] 8fdd7c2 - [LoopUnroll] Clamp unroll count to MaxTripCount

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 7 12:20:16 PDT 2021


Author: Nikita Popov
Date: 2021-06-07T21:08:42+02:00
New Revision: 8fdd7c2ff16da370e28ef1b22e400d57a541484f

URL: https://github.com/llvm/llvm-project/commit/8fdd7c2ff16da370e28ef1b22e400d57a541484f
DIFF: https://github.com/llvm/llvm-project/commit/8fdd7c2ff16da370e28ef1b22e400d57a541484f.diff

LOG: [LoopUnroll] Clamp unroll count to MaxTripCount

Unrolling with more iterations than MaxTripCount is pointless, as
those iterations can never be executed. As such, we clamp ULO.Count
to MaxTripCount if it is known. This means we no longer need to
consider iterations after MaxTripCount for exit folding, and the
CompletelyUnroll flag becomes independent of ULO.TripCount.

Differential Revision: https://reviews.llvm.org/D103748

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/test/Transforms/LoopUnroll/multiple-exits.ll
    llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
    llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
    llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index f7590accb31d..b3658fbe9e1c 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -301,11 +301,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   if (ULO.TripMultiple != 1)
     LLVM_DEBUG(dbgs() << "  Trip Multiple = " << ULO.TripMultiple << "\n");
 
-  // Effectively "DCE" unrolled iterations that are beyond the tripcount
-  // and will never be executed.
-  if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
-    ULO.Count = ULO.TripCount;
-
   // Don't enter the unroll code if there is nothing to do.
   if (ULO.TripCount == 0 && ULO.Count < 2) {
     LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
@@ -316,17 +311,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   assert(ULO.TripMultiple > 0);
   assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
 
-  // Are we eliminating the loop control altogether?  Note that we can know
-  // we're eliminating the backedge without knowing exactly which iteration
-  // of the unrolled body exits.
-  const bool CompletelyUnroll = ULO.Count == ULO.TripCount;
-
-  // We assume a run-time trip count if the compiler cannot
-  // figure out the loop trip count and the unroll-runtime
-  // flag is specified.
-  bool RuntimeTripCount =
-      (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
-
   // All these values should be taken only after peeling because they might have
   // changed.
   BasicBlock *Preheader = L->getLoopPreheader();
@@ -336,6 +320,27 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   L->getExitBlocks(ExitBlocks);
   std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
 
+  const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+  const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+
+  // Effectively "DCE" unrolled iterations that are beyond the max tripcount
+  // and will never be executed.
+  if (MaxTripCount && ULO.Count > MaxTripCount)
+    ULO.Count = MaxTripCount;
+
+  // Are we eliminating the loop control altogether?  Note that we can know
+  // we're eliminating the backedge without knowing exactly which iteration
+  // of the unrolled body exits.
+  const bool CompletelyUnroll = ULO.Count == MaxTripCount;
+
+  const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
+
+  // We assume a run-time trip count if the compiler cannot
+  // figure out the loop trip count and the unroll-runtime
+  // flag is specified.
+  bool RuntimeTripCount =
+      !CompletelyUnroll && ULO.TripCount == 0 && ULO.AllowRuntime;
+
   // Go through all exits of L and see if there are any phi-nodes there. We just
   // conservatively assume that they're inserted to preserve LCSSA form, which
   // means that complete unrolling might break this form. We need to either fix
@@ -347,11 +352,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
       any_of(ExitBlocks,
              [](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
 
-  const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
-  const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
-
-  const bool PreserveOnlyFirst = ULO.Count == MaxTripCount && MaxOrZero;
-
   // The current loop unroll pass can unroll loops that have
   // (1) single latch; and
   // (2a) latch is unconditional; or
@@ -728,8 +728,6 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
         // Complete (but possibly inexact) unrolling
         if (j == 0)
           return true;
-        if (MaxTripCount && j >= MaxTripCount)
-          return false;
         // Warning: ExactTripCount is the trip count of the exiting
         // block which ends in ExitingBI, not neccessarily the loop.
         if (ExactTripCount && j != ExactTripCount)

diff  --git a/llvm/test/Transforms/LoopUnroll/multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
index 39dfe32d24bf..8a3f51a1fb94 100644
--- a/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/multiple-exits.ll
@@ -3,7 +3,6 @@
 
 declare void @bar()
 
-; TODO: We should unroll by 10, not 20 here
 define void @test1() {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
@@ -55,46 +54,6 @@ define void @test1() {
 ; CHECK-NEXT:    br i1 false, label [[LATCH_10:%.*]], label [[EXIT]]
 ; CHECK:       latch.10:
 ; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_11:%.*]], label [[EXIT]]
-; CHECK:       latch.11:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_12:%.*]], label [[EXIT]]
-; CHECK:       latch.12:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_13:%.*]], label [[EXIT]]
-; CHECK:       latch.13:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_14:%.*]], label [[EXIT]]
-; CHECK:       latch.14:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_15:%.*]], label [[EXIT]]
-; CHECK:       latch.15:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_16:%.*]], label [[EXIT]]
-; CHECK:       latch.16:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_17:%.*]], label [[EXIT]]
-; CHECK:       latch.17:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_18:%.*]], label [[EXIT]]
-; CHECK:       latch.18:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_19:%.*]], label [[EXIT]]
-; CHECK:       latch.19:
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    call void @bar()
-; CHECK-NEXT:    br i1 false, label [[LATCH_20:%.*]], label [[EXIT]]
-; CHECK:       latch.20:
-; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[EXIT]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
index e8dfa0a1a5be..3076084c5330 100644
--- a/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
+++ b/llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
@@ -165,43 +165,35 @@ define void @test3(i32* noalias %A, i1 %cond) {
 ; CHECK-NEXT:    call void @bar(i32 [[TMP0]])
 ; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
 ; CHECK:       for.header:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]] ]
-; CHECK-NEXT:    call void @bar(i32 [[TMP1]])
+; CHECK-NEXT:    call void @bar(i32 [[TMP0]])
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.for.body_crit_edge:
-; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC]]
+; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
 ; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body.1:
-; CHECK-NEXT:    [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1
 ; CHECK-NEXT:    br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.for.body_crit_edge.1:
-; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_1]]
+; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
 ; CHECK-NEXT:    [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE_1]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.2:
-; CHECK-NEXT:    [[INC_2:%.*]] = add nuw nsw i64 [[INC_1]], 1
 ; CHECK-NEXT:    br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.for.body_crit_edge.2:
-; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_2]]
+; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
 ; CHECK-NEXT:    [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4
 ; CHECK-NEXT:    call void @bar(i32 [[DOTPRE_2]])
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.3:
-; CHECK-NEXT:    [[INC_3]] = add nuw nsw i64 [[INC_2]], 1
-; CHECK-NEXT:    br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]], label [[FOR_END]]
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.for.body_crit_edge.3:
-; CHECK-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]]
-; CHECK-NEXT:    [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4
-; CHECK-NEXT:    br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    unreachable
 ;
 entry:
   %0 = load i32, i32* %A, align 4
@@ -243,7 +235,7 @@ define void @test4(i32 %arg) {
 ; CHECK:       bb1.2:
 ; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1_3:%.*]]
 ; CHECK:       bb1.3:
-; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    br i1 false, label [[BB4]], label [[BB1]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 bb:
   br label %bb1

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index a1c8ffedaeb5..07fcefc621d4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -16,123 +16,50 @@ define i3 @test(i3* %a, i3 %n) {
 ; UNROLL-16:       for.body.preheader:
 ; UNROLL-16-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-16:       for.body:
-; UNROLL-16-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_15:%.*]], [[FOR_BODY_15:%.*]] ]
-; UNROLL-16-NEXT:    [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[ADD_15:%.*]], [[FOR_BODY_15]] ]
-; UNROLL-16-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i3, i3* [[A:%.*]], i64 [[INDVARS_IV]]
-; UNROLL-16-NEXT:    [[TMP0:%.*]] = load i3, i3* [[ARRAYIDX]], align 1
-; UNROLL-16-NEXT:    [[ADD:%.*]] = add nsw i3 [[TMP0]], [[SUM_02]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; UNROLL-16-NEXT:    [[TMP0:%.*]] = load i3, i3* [[A:%.*]], align 1
 ; UNROLL-16-NEXT:    [[EXITCOND:%.*]] = icmp eq i3 1, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_1:%.*]]
 ; UNROLL-16:       for.end.loopexit:
-; UNROLL-16-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7:%.*]] ], [ [[ADD_8:%.*]], [[FOR_BODY_8:%.*]] ], [ [[ADD_9:%.*]], [[FOR_BODY_9:%.*]] ], [ [[ADD_10:%.*]], [[FOR_BODY_10:%.*]] ], [ [[ADD_11:%.*]], [[FOR_BODY_11:%.*]] ], [ [[ADD_12:%.*]], [[FOR_BODY_12:%.*]] ], [ [[ADD_13:%.*]], [[FOR_BODY_13:%.*]] ], [ [[ADD_14:%.*]], [[FOR_BODY_14:%.*]] ], [ [[ADD_15]], [[FOR_BODY_15]] ]
+; UNROLL-16-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[TMP0]], [[FOR_BODY]] ], [ [[ADD_1:%.*]], [[FOR_BODY_1]] ], [ [[ADD_2:%.*]], [[FOR_BODY_2:%.*]] ], [ [[ADD_3:%.*]], [[FOR_BODY_3:%.*]] ], [ [[ADD_4:%.*]], [[FOR_BODY_4:%.*]] ], [ [[ADD_5:%.*]], [[FOR_BODY_5:%.*]] ], [ [[ADD_6:%.*]], [[FOR_BODY_6:%.*]] ]
 ; UNROLL-16-NEXT:    br label [[FOR_END]]
 ; UNROLL-16:       for.end:
 ; UNROLL-16-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
 ; UNROLL-16-NEXT:    ret i3 [[SUM_0_LCSSA]]
 ; UNROLL-16:       for.body.1:
-; UNROLL-16-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 1
 ; UNROLL-16-NEXT:    [[TMP1:%.*]] = load i3, i3* [[ARRAYIDX_1]], align 1
-; UNROLL-16-NEXT:    [[ADD_1]] = add nsw i3 [[TMP1]], [[ADD]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
+; UNROLL-16-NEXT:    [[ADD_1]] = add nsw i3 [[TMP1]], [[TMP0]]
 ; UNROLL-16-NEXT:    [[EXITCOND_1:%.*]] = icmp eq i3 2, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND_1]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_2]]
 ; UNROLL-16:       for.body.2:
-; UNROLL-16-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 2
 ; UNROLL-16-NEXT:    [[TMP2:%.*]] = load i3, i3* [[ARRAYIDX_2]], align 1
 ; UNROLL-16-NEXT:    [[ADD_2]] = add nsw i3 [[TMP2]], [[ADD_1]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
 ; UNROLL-16-NEXT:    [[EXITCOND_2:%.*]] = icmp eq i3 3, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND_2]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_3]]
 ; UNROLL-16:       for.body.3:
-; UNROLL-16-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 3
 ; UNROLL-16-NEXT:    [[TMP3:%.*]] = load i3, i3* [[ARRAYIDX_3]], align 1
 ; UNROLL-16-NEXT:    [[ADD_3]] = add nsw i3 [[TMP3]], [[ADD_2]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
 ; UNROLL-16-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i3 -4, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_4]]
 ; UNROLL-16:       for.body.4:
-; UNROLL-16-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 4
 ; UNROLL-16-NEXT:    [[TMP4:%.*]] = load i3, i3* [[ARRAYIDX_4]], align 1
 ; UNROLL-16-NEXT:    [[ADD_4]] = add nsw i3 [[TMP4]], [[ADD_3]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
 ; UNROLL-16-NEXT:    [[EXITCOND_4:%.*]] = icmp eq i3 -3, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND_4]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_5]]
 ; UNROLL-16:       for.body.5:
-; UNROLL-16-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 5
 ; UNROLL-16-NEXT:    [[TMP5:%.*]] = load i3, i3* [[ARRAYIDX_5]], align 1
 ; UNROLL-16-NEXT:    [[ADD_5]] = add nsw i3 [[TMP5]], [[ADD_4]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
 ; UNROLL-16-NEXT:    [[EXITCOND_5:%.*]] = icmp eq i3 -2, [[N]]
 ; UNROLL-16-NEXT:    br i1 [[EXITCOND_5]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_6]]
 ; UNROLL-16:       for.body.6:
-; UNROLL-16-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; UNROLL-16-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 6
 ; UNROLL-16-NEXT:    [[TMP6:%.*]] = load i3, i3* [[ARRAYIDX_6]], align 1
 ; UNROLL-16-NEXT:    [[ADD_6]] = add nsw i3 [[TMP6]], [[ADD_5]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_6:%.*]] = icmp eq i3 -1, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_6]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_7]]
-; UNROLL-16:       for.body.7:
-; UNROLL-16-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; UNROLL-16-NEXT:    [[TMP7:%.*]] = load i3, i3* [[ARRAYIDX_7]], align 1
-; UNROLL-16-NEXT:    [[ADD_7]] = add nsw i3 [[TMP7]], [[ADD_6]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_7:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
-; UNROLL-16-NEXT:    br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_8]]
-; UNROLL-16:       for.body.8:
-; UNROLL-16-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_7]]
-; UNROLL-16-NEXT:    [[TMP8:%.*]] = load i3, i3* [[ARRAYIDX_8]], align 1
-; UNROLL-16-NEXT:    [[ADD_8]] = add nsw i3 [[TMP8]], [[ADD_7]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_8:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_7]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_8:%.*]] = icmp eq i3 1, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_8]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_9]]
-; UNROLL-16:       for.body.9:
-; UNROLL-16-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_8]]
-; UNROLL-16-NEXT:    [[TMP9:%.*]] = load i3, i3* [[ARRAYIDX_9]], align 1
-; UNROLL-16-NEXT:    [[ADD_9]] = add nsw i3 [[TMP9]], [[ADD_8]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_9:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_8]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_9:%.*]] = icmp eq i3 2, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_9]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_10]]
-; UNROLL-16:       for.body.10:
-; UNROLL-16-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_9]]
-; UNROLL-16-NEXT:    [[TMP10:%.*]] = load i3, i3* [[ARRAYIDX_10]], align 1
-; UNROLL-16-NEXT:    [[ADD_10]] = add nsw i3 [[TMP10]], [[ADD_9]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_10:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_9]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_10:%.*]] = icmp eq i3 3, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_10]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_11]]
-; UNROLL-16:       for.body.11:
-; UNROLL-16-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_10]]
-; UNROLL-16-NEXT:    [[TMP11:%.*]] = load i3, i3* [[ARRAYIDX_11]], align 1
-; UNROLL-16-NEXT:    [[ADD_11]] = add nsw i3 [[TMP11]], [[ADD_10]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_11:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_10]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_11:%.*]] = icmp eq i3 -4, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_11]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_12]]
-; UNROLL-16:       for.body.12:
-; UNROLL-16-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_11]]
-; UNROLL-16-NEXT:    [[TMP12:%.*]] = load i3, i3* [[ARRAYIDX_12]], align 1
-; UNROLL-16-NEXT:    [[ADD_12]] = add nsw i3 [[TMP12]], [[ADD_11]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_12:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_11]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_12:%.*]] = icmp eq i3 -3, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_12]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_13]]
-; UNROLL-16:       for.body.13:
-; UNROLL-16-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_12]]
-; UNROLL-16-NEXT:    [[TMP13:%.*]] = load i3, i3* [[ARRAYIDX_13]], align 1
-; UNROLL-16-NEXT:    [[ADD_13]] = add nsw i3 [[TMP13]], [[ADD_12]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_13:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_12]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_13:%.*]] = icmp eq i3 -2, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_13]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_14]]
-; UNROLL-16:       for.body.14:
-; UNROLL-16-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_13]]
-; UNROLL-16-NEXT:    [[TMP14:%.*]] = load i3, i3* [[ARRAYIDX_14]], align 1
-; UNROLL-16-NEXT:    [[ADD_14]] = add nsw i3 [[TMP14]], [[ADD_13]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_14:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_13]], 1
-; UNROLL-16-NEXT:    [[EXITCOND_14:%.*]] = icmp eq i3 -1, [[N]]
-; UNROLL-16-NEXT:    br i1 [[EXITCOND_14]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY_15]]
-; UNROLL-16:       for.body.15:
-; UNROLL-16-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i3, i3* [[A]], i64 [[INDVARS_IV_NEXT_14]]
-; UNROLL-16-NEXT:    [[TMP15:%.*]] = load i3, i3* [[ARRAYIDX_15]], align 1
-; UNROLL-16-NEXT:    [[ADD_15]] = add nsw i3 [[TMP15]], [[ADD_14]]
-; UNROLL-16-NEXT:    [[INDVARS_IV_NEXT_15]] = add i64 [[INDVARS_IV_NEXT_14]], 1
-; UNROLL-16-NEXT:    br i1 false, label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; UNROLL-16-NEXT:    br label [[FOR_END_LOOPEXIT]]
 ;
 ; UNROLL-4-LABEL: @test(
 ; UNROLL-4-NEXT:  entry:

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
index a16d567a36ce..02e86de97e99 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll
@@ -100,67 +100,45 @@ define dso_local void @hoge_5(i8 %arg) {
 ; UPPER-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 17
 ; UPPER-NEXT:    br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; UPPER:       loop.preheader:
-; UPPER-NEXT:    [[TMP1:%.*]] = sub i32 16, [[X]]
-; UPPER-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
-; UPPER-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
-; UPPER-NEXT:    [[TMP4:%.*]] = urem i32 [[TMP2]], 6
-; UPPER-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
-; UPPER-NEXT:    [[XTRAITER:%.*]] = urem i32 [[TMP5]], 6
-; UPPER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; UPPER-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]]
-; UPPER:       loop.prol.preheader:
-; UPPER-NEXT:    br label [[LOOP_PROL:%.*]]
-; UPPER:       loop.prol:
-; UPPER-NEXT:    [[IV_PROL:%.*]] = phi i32 [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[X]], [[LOOP_PROL_PREHEADER]] ]
-; UPPER-NEXT:    [[PTR_PROL:%.*]] = phi i8* [ [[PTR_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[Y]], [[LOOP_PROL_PREHEADER]] ]
-; UPPER-NEXT:    [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LOOP_PROL]] ]
-; UPPER-NEXT:    [[IV_NEXT_PROL]] = add nuw i32 [[IV_PROL]], 4
-; UPPER-NEXT:    [[PTR_NEXT_PROL]] = getelementptr inbounds i8, i8* [[PTR_PROL]], i32 1
-; UPPER-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT_PROL]], align 1
-; UPPER-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[IV_NEXT_PROL]], 17
-; UPPER-NEXT:    [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1
-; UPPER-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
-; UPPER-NEXT:    br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
-; UPPER:       loop.prol.loopexit.unr-lcssa:
-; UPPER-NEXT:    [[IV_UNR_PH:%.*]] = phi i32 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ]
-; UPPER-NEXT:    [[PTR_UNR_PH:%.*]] = phi i8* [ [[PTR_NEXT_PROL]], [[LOOP_PROL]] ]
-; UPPER-NEXT:    br label [[LOOP_PROL_LOOPEXIT]]
-; UPPER:       loop.prol.loopexit:
-; UPPER-NEXT:    [[IV_UNR:%.*]] = phi i32 [ [[X]], [[LOOP_PREHEADER]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; UPPER-NEXT:    [[PTR_UNR:%.*]] = phi i8* [ [[Y]], [[LOOP_PREHEADER]] ], [ [[PTR_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; UPPER-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 5
-; UPPER-NEXT:    br i1 [[TMP7]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_PREHEADER_NEW:%.*]]
-; UPPER:       loop.preheader.new:
 ; UPPER-NEXT:    br label [[LOOP:%.*]]
 ; UPPER:       loop:
-; UPPER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[IV_NEXT_5:%.*]], [[LOOP]] ]
-; UPPER-NEXT:    [[PTR:%.*]] = phi i8* [ [[PTR_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[PTR_NEXT_5:%.*]], [[LOOP]] ]
-; UPPER-NEXT:    [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 4
-; UPPER-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 1
-; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT]], align 1
+; UPPER-NEXT:    [[IV_NEXT:%.*]] = add nuw i32 [[X]], 4
+; UPPER-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 1
+; UPPER-NEXT:    store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1
+; UPPER-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17
+; UPPER-NEXT:    br i1 [[TMP1]], label [[LOOP_1:%.*]], label [[EXIT_LOOPEXIT:%.*]]
+; UPPER:       exit.loopexit:
+; UPPER-NEXT:    br label [[EXIT]]
+; UPPER:       exit:
+; UPPER-NEXT:    ret void
+; UPPER:       loop.1:
 ; UPPER-NEXT:    [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4
 ; UPPER-NEXT:    [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1
+; UPPER-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[IV_NEXT_1]], 17
+; UPPER-NEXT:    br i1 [[TMP2]], label [[LOOP_2:%.*]], label [[EXIT_LOOPEXIT]]
+; UPPER:       loop.2:
 ; UPPER-NEXT:    [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4
 ; UPPER-NEXT:    [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1
+; UPPER-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[IV_NEXT_2]], 17
+; UPPER-NEXT:    br i1 [[TMP3]], label [[LOOP_3:%.*]], label [[EXIT_LOOPEXIT]]
+; UPPER:       loop.3:
 ; UPPER-NEXT:    [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4
 ; UPPER-NEXT:    [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1
+; UPPER-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[IV_NEXT_3]], 17
+; UPPER-NEXT:    br i1 [[TMP4]], label [[LOOP_4:%.*]], label [[EXIT_LOOPEXIT]]
+; UPPER:       loop.4:
 ; UPPER-NEXT:    [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4
 ; UPPER-NEXT:    [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1
-; UPPER-NEXT:    [[IV_NEXT_5]] = add nuw i32 [[IV_NEXT_4]], 4
-; UPPER-NEXT:    [[PTR_NEXT_5]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
+; UPPER-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[IV_NEXT_4]], 17
+; UPPER-NEXT:    br i1 [[TMP5]], label [[LOOP_5:%.*]], label [[EXIT_LOOPEXIT]]
+; UPPER:       loop.5:
+; UPPER-NEXT:    [[PTR_NEXT_5:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1
 ; UPPER-NEXT:    store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1
-; UPPER-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IV_NEXT_5]], 17
-; UPPER-NEXT:    br i1 [[TMP8]], label [[LOOP]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]]
-; UPPER:       exit.loopexit.unr-lcssa:
 ; UPPER-NEXT:    br label [[EXIT_LOOPEXIT]]
-; UPPER:       exit.loopexit:
-; UPPER-NEXT:    br label [[EXIT]]
-; UPPER:       exit:
-; UPPER-NEXT:    ret void
 ;
 entry:
   %x = load i32, i32* @global, align 4


        


More information about the llvm-commits mailing list