[llvm] r284053 - Reapply "[LoopUnroll] Use the upper bound of the loop trip count to fullly unroll a loop"

Haicheng Wu via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 12 14:29:38 PDT 2016


Author: haicheng
Date: Wed Oct 12 16:29:38 2016
New Revision: 284053

URL: http://llvm.org/viewvc/llvm-project?rev=284053&view=rev
Log:
Reapply "[LoopUnroll] Use the upper bound of the loop trip count to fullly unroll a loop"

Reappy r284044 after revert in r284051. Krzysztof fixed the error in r284049.

The original summary:

This patch tries to fully unroll loops having break statement like this

for (int i = 0; i < 8; i++) {
    if (a[i] == value) {
        found = true;
        break;
    }
}

GCC can fully unroll such loops, but currently LLVM cannot because LLVM only
supports loops having exact constant trip counts.

The upper bound of the trip count can be obtained from calling
ScalarEvolution::getMaxBackedgeTakenCount(). Part of the patch is the
refactoring work in SCEV to prevent duplicating code.

The feature of using the upper bound is enabled under the same circumstance
when runtime unrolling is enabled since both are used to unroll loops without
knowing the exact constant trip count.

Added:
    llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/trunk/include/llvm/Transforms/Scalar.h
    llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
    llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Wed Oct 12 16:29:38 2016
@@ -1330,6 +1330,11 @@ public:
   /// prematurely via another branch.
   unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
 
+  /// Returns the upper bound of the loop trip count as a normal unsigned
+  /// value.
+  /// Returns 0 if the trip count is unknown or not constant.
+  unsigned getSmallConstantMaxTripCount(Loop *L);
+
   /// Returns the largest constant divisor of the trip count of the
   /// loop if it is a single-exit loop and we can compute a small maximum for
   /// that loop.

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Wed Oct 12 16:29:38 2016
@@ -290,6 +290,8 @@ public:
     /// Apply loop unroll on any kind of loop
     /// (mainly to loops that fail runtime unrolling).
     bool Force;
+    /// Allow using trip count upper bound to unroll loops.
+    bool UpperBound;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling

Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Wed Oct 12 16:29:38 2016
@@ -284,7 +284,8 @@ public:
     }
 
     // Enable runtime and partial unrolling up to the specified size.
-    UP.Partial = UP.Runtime = true;
+    // Enable using trip count upper bound to unroll loops.
+    UP.Partial = UP.Runtime = UP.UpperBound = true;
     UP.PartialThreshold = MaxOps;
 
     // Avoid unrolling when optimizing for size.

Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar.h Wed Oct 12 16:29:38 2016
@@ -169,8 +169,9 @@ Pass *createLoopInstSimplifyPass();
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
 Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
-                           int AllowPartial = -1, int Runtime = -1);
-// Create an unrolling pass for full unrolling only.
+                           int AllowPartial = -1, int Runtime = -1,
+                           int UpperBound = -1);
+// Create an unrolling pass for full unrolling that uses exact trip count only.
 Pass *createSimpleLoopUnrollPass();
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h Wed Oct 12 16:29:38 2016
@@ -21,6 +21,7 @@ struct LoopUnrollPass : public PassInfoM
   Optional<unsigned> ProvidedThreshold;
   Optional<bool> ProvidedAllowPartial;
   Optional<bool> ProvidedRuntime;
+  Optional<bool> ProvidedUpperBound;
 
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };

Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Wed Oct 12 16:29:38 2016
@@ -32,8 +32,8 @@ class ScalarEvolution;
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                 bool AllowRuntime, bool AllowExpensiveTripCount,
-                unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
-                DominatorTree *DT, AssumptionCache *AC,
+                bool UseUpperBound, unsigned TripMultiple, LoopInfo *LI,
+                ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
                 OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Wed Oct 12 16:29:38 2016
@@ -5292,6 +5292,20 @@ const SCEV *ScalarEvolution::createSCEV(
 //                   Iteration Count Computation Code
 //
 
+static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripCount(L, ExitingBB);
@@ -5307,17 +5321,13 @@ unsigned ScalarEvolution::getSmallConsta
          "Exiting block must actually branch out of the loop!");
   const SCEVConstant *ExitCount =
       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
-  if (!ExitCount)
-    return 0;
-
-  ConstantInt *ExitConst = ExitCount->getValue();
-
-  // Guard against huge trip counts.
-  if (ExitConst->getValue().getActiveBits() > 32)
-    return 0;
+  return getConstantTripCount(ExitCount);
+}
 
-  // In case of integer overflow, this returns 0, which is correct.
-  return ((unsigned)ExitConst->getZExtValue()) + 1;
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+  const auto *MaxExitCount =
+      dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+  return getConstantTripCount(MaxExitCount);
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Wed Oct 12 16:29:38 2016
@@ -92,6 +92,11 @@ static cl::opt<bool>
     UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
                   cl::desc("Unroll loops with run-time trip counts"));
 
+static cl::opt<unsigned> UnrollMaxUpperBound(
+    "unroll-max-upperbound", cl::init(8), cl::Hidden,
+    cl::desc(
+        "The max of trip count upper bound that is considered in unrolling"));
+
 static cl::opt<unsigned> PragmaUnrollThreshold(
     "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
     cl::desc("Unrolled size limit for loops with an unroll(full) or "
@@ -107,7 +112,7 @@ static const unsigned NoThreshold = UINT
 static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
     Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
-    Optional<bool> UserRuntime) {
+    Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
   TargetTransformInfo::UnrollingPreferences UP;
 
   // Set up the defaults
@@ -126,6 +131,7 @@ static TargetTransformInfo::UnrollingPre
   UP.AllowRemainder = true;
   UP.AllowExpensiveTripCount = false;
   UP.Force = false;
+  UP.UpperBound = false;
 
   // Override with any target specific settings
   TTI.getUnrollingPreferences(L, UP);
@@ -156,6 +162,8 @@ static TargetTransformInfo::UnrollingPre
     UP.AllowRemainder = UnrollAllowRemainder;
   if (UnrollRuntime.getNumOccurrences() > 0)
     UP.Runtime = UnrollRuntime;
+  if (UnrollMaxUpperBound == 0)
+    UP.UpperBound = false;
 
   // Apply user values provided by argument
   if (UserThreshold.hasValue()) {
@@ -168,6 +176,8 @@ static TargetTransformInfo::UnrollingPre
     UP.Partial = *UserAllowPartial;
   if (UserRuntime.hasValue())
     UP.Runtime = *UserRuntime;
+  if (UserUpperBound.hasValue())
+    UP.UpperBound = *UserUpperBound;
 
   return UP;
 }
@@ -691,13 +701,11 @@ static bool canUnrollCompletely(Loop *L,
 
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
-static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
-                               DominatorTree &DT, LoopInfo *LI,
-                               ScalarEvolution *SE,
-                               OptimizationRemarkEmitter *ORE,
-                               unsigned TripCount, unsigned TripMultiple,
-                               unsigned LoopSize,
-                               TargetTransformInfo::UnrollingPreferences &UP) {
+static bool computeUnrollCount(
+    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
+    ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
+    unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
+    TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
   // BEInsns represents number of instructions optimized when "back edge"
   // becomes "fall through" in unrolled loop.
   // For now we count a conditional branch on a backedge and a comparison
@@ -749,14 +757,27 @@ static bool computeUnrollCount(Loop *L,
   }
 
   // 3rd priority is full unroll count.
-  // Full unroll make sense only when TripCount could be staticaly calculated.
+  // Full unroll makes sense only when TripCount or its upper bound could be
+  // statically calculated.
   // Also we need to check if we exceed FullUnrollMaxCount.
-  if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+  // If using the upper bound to unroll, TripMultiple should be set to 1 because
+  // we do not know when loop may exit.
+  // MaxTripCount and ExactTripCount cannot both be non zero since we only
+  // compute the former when the latter is zero.
+  unsigned ExactTripCount = TripCount;
+  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
+         "ExtractTripCound and MaxTripCount cannot both be non zero.");
+  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
     // When computing the unrolled size, note that BEInsns are not replicated
     // like the rest of the loop body.
-    UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
+    UnrolledSize =
+        (uint64_t)(LoopSize - BEInsns) * FullUnrollTripCount + BEInsns;
     if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
                             UnrolledSize, UnrolledSize)) {
+      UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+      TripCount = FullUnrollTripCount;
+      TripMultiple = UP.UpperBound ? 1 : TripMultiple;
       UP.Count = TripCount;
       return ExplicitUnroll;
     } else {
@@ -764,12 +785,15 @@ static bool computeUnrollCount(Loop *L,
       // helps to remove a significant number of instructions.
       // To check that, run additional analysis on the loop.
       if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
-              L, TripCount, DT, *SE, TTI,
+              L, FullUnrollTripCount, DT, *SE, TTI,
               UP.Threshold + UP.DynamicCostSavingsDiscount))
         if (canUnrollCompletely(L, UP.Threshold,
                                 UP.PercentDynamicCostSavedThreshold,
                                 UP.DynamicCostSavingsDiscount,
                                 Cost->UnrolledCost, Cost->RolledDynamicCost)) {
+          UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+          TripCount = FullUnrollTripCount;
+          TripMultiple = UP.UpperBound ? 1 : TripMultiple;
           UP.Count = TripCount;
           return ExplicitUnroll;
         }
@@ -909,7 +933,8 @@ static bool tryToUnrollLoop(Loop *L, Dom
                             Optional<unsigned> ProvidedCount,
                             Optional<unsigned> ProvidedThreshold,
                             Optional<bool> ProvidedAllowPartial,
-                            Optional<bool> ProvidedRuntime) {
+                            Optional<bool> ProvidedRuntime,
+                            Optional<bool> ProvidedUpperBound) {
   DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
                << "] Loop %" << L->getHeader()->getName() << "\n");
   if (HasUnrollDisablePragma(L)) {
@@ -939,6 +964,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
+  unsigned MaxTripCount = 0;
   unsigned TripMultiple = 1;
   // If there are multiple exiting blocks but one of them is the latch, use the
   // latch for the trip count estimation. Otherwise insist on a single exiting
@@ -953,7 +979,7 @@ static bool tryToUnrollLoop(Loop *L, Dom
 
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
       L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
-      ProvidedRuntime);
+      ProvidedRuntime, ProvidedUpperBound);
 
   // Exit early if unrolling is disabled.
   if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
@@ -974,8 +1000,23 @@ static bool tryToUnrollLoop(Loop *L, Dom
   if (Convergent)
     UP.AllowRemainder = false;
 
-  bool IsCountSetExplicitly = computeUnrollCount(
-      L, TTI, DT, LI, SE, &ORE, TripCount, TripMultiple, LoopSize, UP);
+  // Try to find the trip count upper bound if it is allowed and we cannot find
+  // exact trip count.
+  if (UP.UpperBound) {
+    if (!TripCount) {
+      MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+      // Only unroll with small upper bound.
+      if (MaxTripCount > UnrollMaxUpperBound)
+        MaxTripCount = 0;
+    }
+  }
+
+  // computeUnrollCount() decides whether it is beneficial to use upper bound to
+  // fully unroll the loop.
+  bool UseUpperBound = false;
+  bool IsCountSetExplicitly =
+      computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
+                         TripMultiple, LoopSize, UP, UseUpperBound);
   if (!UP.Count)
     return false;
   // Unroll factor (Count) must be less or equal to TripCount.
@@ -984,8 +1025,8 @@ static bool tryToUnrollLoop(Loop *L, Dom
 
   // Unroll the loop.
   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
-                  UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
-                  &ORE, PreserveLCSSA))
+                  UP.AllowExpensiveTripCount, UseUpperBound, TripMultiple, LI,
+                  SE, &DT, &AC, &ORE, PreserveLCSSA))
     return false;
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
@@ -1001,10 +1042,11 @@ public:
   static char ID; // Pass ID, replacement for typeid
   LoopUnroll(Optional<unsigned> Threshold = None,
              Optional<unsigned> Count = None,
-             Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+             Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
+             Optional<bool> UpperBound = None)
       : LoopPass(ID), ProvidedCount(std::move(Count)),
         ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
-        ProvidedRuntime(Runtime) {
+        ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
 
@@ -1012,6 +1054,7 @@ public:
   Optional<unsigned> ProvidedThreshold;
   Optional<bool> ProvidedAllowPartial;
   Optional<bool> ProvidedRuntime;
+  Optional<bool> ProvidedUpperBound;
 
   bool runOnLoop(Loop *L, LPPassManager &) override {
     if (skipLoop(L))
@@ -1033,7 +1076,8 @@ public:
 
     return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
                            ProvidedCount, ProvidedThreshold,
-                           ProvidedAllowPartial, ProvidedRuntime);
+                           ProvidedAllowPartial, ProvidedRuntime,
+                           ProvidedUpperBound);
   }
 
   /// This transformation requires natural loop information & requires that
@@ -1057,7 +1101,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransfo
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
-                                 int Runtime) {
+                                 int Runtime, int UpperBound) {
   // TODO: It would make more sense for this function to take the optionals
   // directly, but that's dangerous since it would silently break out of tree
   // callers.
@@ -1065,11 +1109,12 @@ Pass *llvm::createLoopUnrollPass(int Thr
                         Count == -1 ? None : Optional<unsigned>(Count),
                         AllowPartial == -1 ? None
                                            : Optional<bool>(AllowPartial),
-                        Runtime == -1 ? None : Optional<bool>(Runtime));
+                        Runtime == -1 ? None : Optional<bool>(Runtime),
+                        UpperBound == -1 ? None : Optional<bool>(UpperBound));
 }
 
 Pass *llvm::createSimpleLoopUnrollPass() {
-  return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+  return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
 }
 
 PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) {
@@ -1098,9 +1143,10 @@ PreservedAnalyses LoopUnrollPass::run(Lo
     report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
                        "cached at a higher level");
 
-  bool Changed = tryToUnrollLoop(
-      &L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true, ProvidedCount,
-      ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime);
+  bool Changed =
+      tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true,
+                      ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
+                      ProvidedRuntime, ProvidedUpperBound);
 
   if (!Changed)
     return PreservedAnalyses::all();

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=284053&r1=284052&r2=284053&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Wed Oct 12 16:29:38 2016
@@ -187,6 +187,10 @@ static bool needToInsertPhisForLCSSA(Loo
 /// iterations via an early branch, but control may not exit the loop from the
 /// LatchBlock's terminator prior to TripCount iterations.
 ///
+/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
+/// needs to be preserved.  It is needed when we use trip count upper bound to
+/// fully unroll the loop.
+///
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
 ///
@@ -203,9 +207,10 @@ static bool needToInsertPhisForLCSSA(Loo
 /// DominatorTree if they are non-null.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
-                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
-                      DominatorTree *DT, AssumptionCache *AC,
-                      OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
+                      bool PreserveCondBr, unsigned TripMultiple, LoopInfo *LI,
+                      ScalarEvolution *SE, DominatorTree *DT,
+                      AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+                      bool PreserveLCSSA) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -539,12 +544,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned
     if (CompletelyUnroll) {
       if (j == 0)
         Dest = LoopExit;
-      NeedConditional = false;
-    }
-
-    // If we know the trip count or a multiple of it, we can safely use an
-    // unconditional branch for some iterations.
-    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      // If using trip count upper bound to completely unroll, we need to keep
+      // the conditional branch except the last one because the loop may exit
+      // after any iteration.
+      assert(NeedConditional &&
+             "NeedCondition cannot be modified by both complete "
+             "unrolling and runtime unrolling");
+      NeedConditional = (PreserveCondBr && j);
+    } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      // If we know the trip count or a multiple of it, we can safely use an
+      // unconditional branch for some iterations.
       NeedConditional = false;
     }
 

Added: llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll?rev=284053&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll Wed Oct 12 16:29:38 2016
@@ -0,0 +1,43 @@
+; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL
+; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL
+
+; This IR comes from this C code:
+;
+;   for (int i = 0; i < 4; i++) {
+;     if (src[i] == 1) {
+;       *dst = i;
+;       break;
+;     }
+;   }
+;
+; This test is meant to check that this loop is unrolled into four iterations.
+
+; UNROLL-LABEL: @test
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL: load i32, i32*
+; UNROLL-NOT: load i32, i32*
+; NOUNROLL-LABEL: @test
+; NOUNROLL: load i32, i32*
+; NOUNROLL-NOT: load i32, i32*
+
+define void @test(i32* %dst, i32* %src) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = sext i32 %i to i64
+  %1 = getelementptr inbounds i32, i32* %src, i64 %0
+  %2 = load i32, i32* %1
+  %inc = add nsw i32 %i, 1
+  %cmp1 = icmp slt i32 %inc, 4
+  %cmp3 = icmp eq i32 %2, 1 
+  %or.cond = and i1 %cmp3, %cmp1
+  br i1 %or.cond, label %for.body, label %exit
+
+exit:                                          ; preds = %for.body
+  store i32 %i, i32* %dst
+  ret void
+}




More information about the llvm-commits mailing list