[llvm] r271071 - The patch refactors unroll pass.

Evgeny Stupachenko via llvm-commits llvm-commits at lists.llvm.org
Fri May 27 16:15:06 PDT 2016


Author: evstupac
Date: Fri May 27 18:15:06 2016
New Revision: 271071

URL: http://llvm.org/viewvc/llvm-project?rev=271071&view=rev
Log:
The patch refactors unroll pass.
Summary:
Unroll factor (Count) calculations moved to a new function.
Early exits on pragma and "-unroll-count" defined factor added.
New type of unrolling "Force" introduced (previously used implicitly).
New unroll preference "AllowRemainder" introduced and set "true" by default.
(should be set to false for architectures that suffers from it).

Reviewers: hfinkel, mzolotukhin, zzheng

Differential Revision: http://reviews.llvm.org/D19553

From: Evgeny Stupachenko <evstupac at gmail.com>

Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=271071&r1=271070&r2=271071&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Fri May 27 18:15:06 2016
@@ -280,9 +280,14 @@ public:
     /// loop body even when the number of loop iterations is not known at
     /// compile time).
     bool Runtime;
+    /// Allow generation of a loop remainder (extra iterations after unroll).
+    bool AllowRemainder;
     /// Allow emitting expensive instructions (such as divisions) when computing
     /// the trip count of a loop for runtime unrolling.
     bool AllowExpensiveTripCount;
+    /// Apply loop unroll on any kind of loop
+    /// (mainly to loops that fail runtime unrolling).
+    bool Force;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling

Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=271071&r1=271070&r2=271071&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Fri May 27 18:15:06 2016
@@ -29,10 +29,10 @@ class MDNode;
 class Pass;
 class ScalarEvolution;
 
-bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
-                bool AllowExpensiveTripCount, unsigned TripMultiple,
-                LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
-                AssumptionCache *AC, bool PreserveLCSSA);
+bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
+                bool AllowRuntime, bool AllowExpensiveTripCount,
+                unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+                DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=271071&r1=271070&r2=271071&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Fri May 27 18:15:06 2016
@@ -81,6 +81,11 @@ static cl::opt<bool>
                        cl::desc("Allows loops to be partially unrolled until "
                                 "-unroll-threshold loop size is reached."));
 
+static cl::opt<bool> UnrollAllowRemainder(
+    "unroll-allow-remainder", cl::Hidden,
+    cl::desc("Allow generation of a loop remainder (extra iterations) "
+             "when unrolling a loop."));
+
 static cl::opt<bool>
     UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
                   cl::desc("Unroll loops with run-time trip counts"));
@@ -100,12 +105,11 @@ static const unsigned NoThreshold = UINT
 static const unsigned DefaultUnrollRuntimeCount = 8;
 
 /// Gather the various unrolling parameters based on the defaults, compiler
-/// flags, TTI overrides, pragmas, and user specified parameters.
+/// flags, TTI overrides and user specified parameters.
 static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
     Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
-    Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
-    bool PragmaEnableUnroll, unsigned TripCount) {
+    Optional<bool> UserRuntime) {
   TargetTransformInfo::UnrollingPreferences UP;
 
   // Set up the defaults
@@ -120,7 +124,9 @@ static TargetTransformInfo::UnrollingPre
   UP.FullUnrollMaxCount = UINT_MAX;
   UP.Partial = false;
   UP.Runtime = false;
+  UP.AllowRemainder = true;
   UP.AllowExpensiveTripCount = false;
+  UP.Force = false;
 
   // Override with any target specific settings
   TTI.getUnrollingPreferences(L, UP);
@@ -131,12 +137,6 @@ static TargetTransformInfo::UnrollingPre
     UP.PartialThreshold = UP.PartialOptSizeThreshold;
   }
 
-  // Apply unroll count pragmas
-  if (PragmaCount)
-    UP.Count = PragmaCount;
-  else if (PragmaFullUnroll)
-    UP.Count = TripCount;
-
   // Apply any user values specified by cl::opt
   if (UnrollThreshold.getNumOccurrences() > 0) {
     UP.Threshold = UnrollThreshold;
@@ -147,14 +147,14 @@ static TargetTransformInfo::UnrollingPre
         UnrollPercentDynamicCostSavedThreshold;
   if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
     UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
-  if (UnrollCount.getNumOccurrences() > 0)
-    UP.Count = UnrollCount;
   if (UnrollMaxCount.getNumOccurrences() > 0)
     UP.MaxCount = UnrollMaxCount;
   if (UnrollFullMaxCount.getNumOccurrences() > 0)
     UP.FullUnrollMaxCount = UnrollFullMaxCount;
   if (UnrollAllowPartial.getNumOccurrences() > 0)
     UP.Partial = UnrollAllowPartial;
+  if (UnrollAllowRemainder.getNumOccurrences() > 0)
+    UP.AllowRemainder = UnrollAllowRemainder;
   if (UnrollRuntime.getNumOccurrences() > 0)
     UP.Runtime = UnrollRuntime;
 
@@ -170,18 +170,6 @@ static TargetTransformInfo::UnrollingPre
   if (UserRuntime.hasValue())
     UP.Runtime = *UserRuntime;
 
-  if (PragmaCount > 0 ||
-      ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
-    // If the loop has an unrolling pragma, we want to be more aggressive with
-    // unrolling limits. Set thresholds to at least the PragmaTheshold value
-    // which is larger than the default limits.
-    if (UP.Threshold != NoThreshold)
-      UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
-    if (UP.PartialThreshold != NoThreshold)
-      UP.PartialThreshold =
-          std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
-  }
-
   return UP;
 }
 
@@ -699,84 +687,77 @@ static bool canUnrollCompletely(Loop *L,
   return false;
 }
 
-static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
-                            ScalarEvolution *SE, const TargetTransformInfo &TTI,
-                            AssumptionCache &AC, bool PreserveLCSSA,
-                            Optional<unsigned> ProvidedCount,
-                            Optional<unsigned> ProvidedThreshold,
-                            Optional<bool> ProvidedAllowPartial,
-                            Optional<bool> ProvidedRuntime) {
-  BasicBlock *Header = L->getHeader();
-  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
-               << "] Loop %" << Header->getName() << "\n");
+// Returns true if unroll count was set explicitly.
+// Calculates unroll count and writes it to UP.Count.
+static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
+                               DominatorTree &DT, LoopInfo *LI,
+                               ScalarEvolution *SE, unsigned TripCount,
+                               unsigned TripMultiple, unsigned LoopSize,
+                               TargetTransformInfo::UnrollingPreferences &UP) {
+  // BEInsns represents number of instructions optimized when "back edge"
+  // becomes "fall through" in unrolled loop.
+  // For now we count a conditional branch on a backedge and a comparison
+  // feeding it.
+  unsigned BEInsns = 2;
+  // Check for explicit Count.
+  // 1st priority is unroll count set by "unroll-count" option.
+  bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+  if (UserUnrollCount) {
+    UP.Count = UnrollCount;
+    UP.AllowExpensiveTripCount = true;
+    UP.Force = true;
+    if (UP.AllowRemainder &&
+        (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
+      return true;
+  }
 
-  if (HasUnrollDisablePragma(L)) {
-    return false;
+  // 2nd priority is unroll count set by pragma.
+  unsigned PragmaCount = UnrollCountPragmaValue(L);
+  if (PragmaCount > 0) {
+    UP.Count = PragmaCount;
+    UP.Runtime = true;
+    UP.AllowExpensiveTripCount = true;
+    UP.Force = true;
+    if (UP.AllowRemainder &&
+        (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+      return true;
   }
   bool PragmaFullUnroll = HasUnrollFullPragma(L);
-  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
-  unsigned PragmaCount = UnrollCountPragmaValue(L);
-  bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
-
-  // Find trip count and trip multiple if count is not available
-  unsigned TripCount = 0;
-  unsigned TripMultiple = 1;
-  // If there are multiple exiting blocks but one of them is the latch, use the
-  // latch for the trip count estimation. Otherwise insist on a single exiting
-  // block for the trip count estimation.
-  BasicBlock *ExitingBlock = L->getLoopLatch();
-  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
-    ExitingBlock = L->getExitingBlock();
-  if (ExitingBlock) {
-    TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
-    TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+  if (PragmaFullUnroll && TripCount != 0) {
+    UP.Count = TripCount;
+    if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+      return false;
   }
 
-  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
-      ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
-      TripCount);
-
-  unsigned Count = UP.Count;
-  bool CountSetExplicitly = Count != 0;
-  // Use a heuristic count if we didn't set anything explicitly.
-  if (!CountSetExplicitly)
-    Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
-  if (TripCount && Count > TripCount)
-    Count = TripCount;
-  Count = std::min(Count, UP.FullUnrollMaxCount);
+  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
+  bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+                        PragmaEnableUnroll || UserUnrollCount;
 
-  unsigned NumInlineCandidates;
-  bool NotDuplicatable;
-  bool Convergent;
-  unsigned LoopSize = ApproximateLoopSize(
-      L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
-  DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+  uint64_t UnrolledSize;
+  DebugLoc LoopLoc = L->getStartLoc();
+  Function *F = L->getHeader()->getParent();
+  LLVMContext &Ctx = F->getContext();
 
-  // When computing the unrolled size, note that the conditional branch on the
-  // backedge and the comparison feeding it are not replicated like the rest of
-  // the loop body (which is why 2 is subtracted).
-  uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2;
-  if (NotDuplicatable) {
-    DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
-                 << " instructions.\n");
-    return false;
-  }
-  if (NumInlineCandidates != 0) {
-    DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
-    return false;
+  if (ExplicitUnroll && TripCount != 0) {
+    // If the loop has an unrolling pragma, we want to be more aggressive with
+    // unrolling limits. Set thresholds to at least the PragmaThreshold value
+    // which is larger than the default limits.
+    UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+    UP.PartialThreshold =
+        std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
   }
 
-  // Given Count, TripCount and thresholds determine the type of
-  // unrolling which is to be performed.
-  enum { Full = 0, Partial = 1, Runtime = 2 };
-  int Unrolling;
-  if (TripCount && Count == TripCount) {
-    Unrolling = Partial;
-    // If the loop is really small, we don't need to run an expensive analysis.
+  // 3rd priority is full unroll count.
+  // Full unroll make sense only when TripCount could be staticaly calculated.
+  // Also we need to check if we exceed FullUnrollMaxCount.
+  if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+    // When computing the unrolled size, note that BEInsns are not replicated
+    // like the rest of the loop body.
+    UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
     if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
                             UnrolledSize, UnrolledSize)) {
-      Unrolling = Full;
+      UP.Count = TripCount;
+      return ExplicitUnroll;
     } else {
       // The loop isn't that small, but we still can fully unroll it if that
       // helps to remove a significant number of instructions.
@@ -788,147 +769,209 @@ static bool tryToUnrollLoop(Loop *L, Dom
                                 UP.PercentDynamicCostSavedThreshold,
                                 UP.DynamicCostSavingsDiscount,
                                 Cost->UnrolledCost, Cost->RolledDynamicCost)) {
-          Unrolling = Full;
+          UP.Count = TripCount;
+          return ExplicitUnroll;
         }
     }
-  } else if (TripCount && Count < TripCount) {
-    Unrolling = Partial;
-  } else {
-    Unrolling = Runtime;
   }
 
-  // Reduce count based on the type of unrolling and the threshold values.
-  unsigned OriginalCount = Count;
-  bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
-  // Don't unroll a runtime trip count loop with unroll full pragma.
-  if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
-    AllowRuntime = false;
-  }
-  bool DecreasedCountDueToConvergence = false;
-  if (Unrolling == Partial) {
-    bool AllowPartial = PragmaEnableUnroll || UP.Partial;
-    if (!AllowPartial && !CountSetExplicitly) {
+  // 4rd priority is partial unrolling.
+  // Try partial unroll only when TripCount could be staticaly calculated.
+  if (TripCount) {
+    if (UP.Count == 0)
+      UP.Count = TripCount;
+    UP.Partial |= ExplicitUnroll;
+    if (!UP.Partial) {
       DEBUG(dbgs() << "  will not try to unroll partially because "
                    << "-unroll-allow-partial not given\n");
+      UP.Count = 0;
       return false;
     }
-    if (UP.PartialThreshold != NoThreshold && Count > 1) {
+    if (UP.PartialThreshold != NoThreshold) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
+      UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
       if (UnrolledSize > UP.PartialThreshold)
-        Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
-      if (Count > UP.MaxCount)
-        Count = UP.MaxCount;
-      while (Count != 0 && TripCount % Count != 0)
-        Count--;
-      if (AllowRuntime && Count <= 1) {
+        UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
+                   (LoopSize - BEInsns);
+      if (UP.Count > UP.MaxCount)
+        UP.Count = UP.MaxCount;
+      while (UP.Count != 0 && TripCount % UP.Count != 0)
+        UP.Count--;
+      if (UP.AllowRemainder && UP.Count <= 1) {
         // If there is no Count that is modulo of TripCount, set Count to
         // largest power-of-two factor that satisfies the threshold limit.
         // As we'll create fixup loop, do the type of unrolling only if
-        // runtime unrolling is allowed.
-        Count = DefaultUnrollRuntimeCount;
-        UnrolledSize = (LoopSize - 2) * Count + 2;
-        while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
-          Count >>= 1;
-          UnrolledSize = (LoopSize - 2) * Count + 2;
+        // remainder loop is allowed.
+        UP.Count = DefaultUnrollRuntimeCount;
+        UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+        while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+          UP.Count >>= 1;
+          UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
         }
       }
+      if (UP.Count < 2) {
+        if (PragmaEnableUnroll)
+          emitOptimizationRemarkMissed(
+              Ctx, DEBUG_TYPE, *F, LoopLoc,
+              "Unable to unroll loop as directed by unroll(enable) pragma "
+              "because unrolled size is too large.");
+        UP.Count = 0;
+      }
+    } else {
+      UP.Count = TripCount;
     }
-  } else if (Unrolling == Runtime) {
-    if (!AllowRuntime && !CountSetExplicitly) {
-      DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
-                   << "-unroll-runtime not given\n");
-      return false;
-    }
-
-    // Reduce unroll count to be the largest power-of-two factor of
-    // the original count which satisfies the threshold limit.
-    while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
-      Count >>= 1;
-      UnrolledSize = (LoopSize - 2) * Count + 2;
-    }
-
-    if (Count > UP.MaxCount)
-      Count = UP.MaxCount;
-
-    // If the loop contains a convergent operation, the prelude we'd add
-    // to do the first few instructions before we hit the unrolled loop
-    // is unsafe -- it adds a control-flow dependency to the convergent
-    // operation.  Therefore Count must divide TripMultiple.
-    //
-    // TODO: This is quite conservative.  In practice, convergent_op()
-    // is likely to be called unconditionally in the loop.  In this
-    // case, the program would be ill-formed (on most architectures)
-    // unless n were the same on all threads in a thread group.
-    // Assuming n is the same on all threads, any kind of unrolling is
-    // safe.  But currently llvm's notion of convergence isn't powerful
-    // enough to express this.
-    unsigned OrigCount = Count;
-    while (Convergent && Count != 0 && TripMultiple % Count != 0) {
-      DecreasedCountDueToConvergence = true;
-      Count >>= 1;
-    }
-    if (OrigCount > Count) {
-      DEBUG(dbgs() << "  loop contains a convergent instruction, so unroll "
-                      "count must divide the trip multiple, "
-                   << TripMultiple << ".  Reducing unroll count from "
-                   << OrigCount << " to " << Count << ".\n");
-    }
-    DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
-  }
-
-  if (HasPragma) {
-    // Emit optimization remarks if we are unable to unroll the loop
-    // as directed by a pragma.
-    DebugLoc LoopLoc = L->getStartLoc();
-    Function *F = Header->getParent();
-    LLVMContext &Ctx = F->getContext();
-    if (PragmaCount > 0 && DecreasedCountDueToConvergence) {
-      emitOptimizationRemarkMissed(
-          Ctx, DEBUG_TYPE, *F, LoopLoc,
-          Twine("Unable to unroll loop the number of times directed by "
-                "unroll_count pragma because the loop contains a convergent "
-                "instruction, and so must have an unroll count that divides "
-                "the loop trip multiple of ") +
-              Twine(TripMultiple) + ".  Unrolling instead " + Twine(Count) +
-              " time(s).");
-    } else if ((PragmaCount > 0) && Count != OriginalCount) {
-      emitOptimizationRemarkMissed(
-          Ctx, DEBUG_TYPE, *F, LoopLoc,
-          "Unable to unroll loop the number of times directed by "
-          "unroll_count pragma because unrolled size is too large.");
-    } else if (PragmaFullUnroll && !TripCount) {
+    if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+        UP.Count != TripCount)
       emitOptimizationRemarkMissed(
           Ctx, DEBUG_TYPE, *F, LoopLoc,
-          "Unable to fully unroll loop as directed by unroll(full) pragma "
-          "because loop has a runtime trip count.");
-    } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
-      emitOptimizationRemarkMissed(
-          Ctx, DEBUG_TYPE, *F, LoopLoc,
-          "Unable to unroll loop as directed by unroll(enable) pragma because "
+          "Unable to fully unroll loop as directed by unroll pragma because "
           "unrolled size is too large.");
-    } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
-               Count != TripCount) {
+    return ExplicitUnroll;
+  }
+  assert(TripCount == 0 &&
+         "All cases when TripCount is constant should be covered here.");
+  if (PragmaFullUnroll)
+    emitOptimizationRemarkMissed(
+        Ctx, DEBUG_TYPE, *F, LoopLoc,
+        "Unable to fully unroll loop as directed by unroll(full) pragma "
+        "because loop has a runtime trip count.");
+
+  // 5th priority is runtime unrolling.
+  // Don't unroll a runtime trip count loop when it is disabled.
+  if (HasRuntimeUnrollDisablePragma(L)) {
+    UP.Count = 0;
+    return false;
+  }
+  // Reduce count based on the type of unrolling and the threshold values.
+  UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
+  if (!UP.Runtime) {
+    DEBUG(dbgs() << "  will not try to unroll loop with runtime trip count "
+                 << "-unroll-runtime not given\n");
+    UP.Count = 0;
+    return false;
+  }
+  if (UP.Count == 0)
+    UP.Count = DefaultUnrollRuntimeCount;
+  UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+
+  // Reduce unroll count to be the largest power-of-two factor of
+  // the original count which satisfies the threshold limit.
+  while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+    UP.Count >>= 1;
+    UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+  }
+
+  unsigned OrigCount = UP.Count;
+
+  if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) {
+    while (UP.Count != 0 && TripMultiple % UP.Count != 0)
+      UP.Count >>= 1;
+    DEBUG(dbgs() << "Remainder loop is restricted (that could architecture "
+                    "specific or because the loop contains a convergent "
+                    "instruction), so unroll count must divide the trip "
+                    "multiple, "
+                 << TripMultiple << ".  Reducing unroll count from "
+                 << OrigCount << " to " << UP.Count << ".\n");
+    if (PragmaCount > 0 && !UP.AllowRemainder)
       emitOptimizationRemarkMissed(
           Ctx, DEBUG_TYPE, *F, LoopLoc,
-          "Unable to fully unroll loop as directed by unroll pragma because "
-          "unrolled size is too large.");
-    }
+          Twine("Unable to unroll loop the number of times directed by "
+                "unroll_count pragma because remainder loop is restricted "
+                "(that could architecture specific or because the loop "
+                "contains a convergent instruction) and so must have an unroll "
+                "count that divides the loop trip multiple of ") +
+              Twine(TripMultiple) + ".  Unrolling instead " + Twine(UP.Count) +
+              " time(s).");
+  }
+
+  if (UP.Count > UP.MaxCount)
+    UP.Count = UP.MaxCount;
+  DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count << "\n");
+  if (UP.Count < 2)
+    UP.Count = 0;
+  return ExplicitUnroll;
+}
+
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+                            ScalarEvolution *SE, const TargetTransformInfo &TTI,
+                            AssumptionCache &AC, bool PreserveLCSSA,
+                            Optional<unsigned> ProvidedCount,
+                            Optional<unsigned> ProvidedThreshold,
+                            Optional<bool> ProvidedAllowPartial,
+                            Optional<bool> ProvidedRuntime) {
+  BasicBlock *Header = L->getHeader();
+  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+               << "] Loop %" << Header->getName() << "\n");
+  if (HasUnrollDisablePragma(L)) {
+    return false;
   }
 
-  if (Unrolling != Full && Count < 2) {
-    // Partial unrolling by 1 is a nop.  For full unrolling, a factor
-    // of 1 makes sense because loop control can be eliminated.
+  unsigned NumInlineCandidates;
+  bool NotDuplicatable;
+  bool Convergent;
+  unsigned LoopSize = ApproximateLoopSize(
+      L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
+  DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+  if (NotDuplicatable) {
+    DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
+                 << " instructions.\n");
     return false;
   }
+  if (NumInlineCandidates != 0) {
+    DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
+    return false;
+  }
+
+  // Find trip count and trip multiple if count is not available
+  unsigned TripCount = 0;
+  unsigned TripMultiple = 1;
+  // If there are multiple exiting blocks but one of them is the latch, use the
+  // latch for the trip count estimation. Otherwise insist on a single exiting
+  // block for the trip count estimation.
+  BasicBlock *ExitingBlock = L->getLoopLatch();
+  if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
+    ExitingBlock = L->getExitingBlock();
+  if (ExitingBlock) {
+    TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+    TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+  }
+
+  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+      L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+      ProvidedRuntime);
+
+  // If the loop contains a convergent operation, the prelude we'd add
+  // to do the first few instructions before we hit the unrolled loop
+  // is unsafe -- it adds a control-flow dependency to the convergent
+  // operation.  Therefore restrict remainder loop (try unrollig without).
+  //
+  // TODO: This is quite conservative.  In practice, convergent_op()
+  // is likely to be called unconditionally in the loop.  In this
+  // case, the program would be ill-formed (on most architectures)
+  // unless n were the same on all threads in a thread group.
+  // Assuming n is the same on all threads, any kind of unrolling is
+  // safe.  But currently llvm's notion of convergence isn't powerful
+  // enough to express this.
+  if (Convergent)
+    UP.AllowRemainder = false;
+
+  bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
+                                                 TripMultiple, LoopSize, UP);
+  if (!UP.Count)
+    return false;
+  // Unroll factor (Count) must be less or equal to TripCount.
+  if (TripCount && UP.Count > TripCount)
+    UP.Count = TripCount;
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
-                  TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
+  if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
+                  UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
+                  PreserveLCSSA))
     return false;
 
-  // If loop has an unroll count pragma mark loop as unrolled to prevent
-  // unrolling beyond that requested by the pragma.
-  if (HasPragma && PragmaCount != 0)
+  // If loop has an unroll count pragma or unrolled by explicitly set count
+  // mark loop as unrolled to prevent unrolling beyond that requested.
+  if (IsCountSetExplicitly)
     SetLoopAlreadyUnrolled(L);
   return true;
 }

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=271071&r1=271070&r2=271071&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Fri May 27 18:15:06 2016
@@ -199,7 +199,7 @@ static bool needToInsertPhisForLCSSA(Loo
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
 /// DominatorTree if they are non-null.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
                       unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
                       DominatorTree *DT, AssumptionCache *AC,
@@ -298,8 +298,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
                                   UnrollRuntimeEpilog, LI, SE, DT, 
-                                  PreserveLCSSA))
-    return false;
+                                  PreserveLCSSA)) {
+    if (Force)
+      RuntimeTripCount = false;
+    else
+      return false;
+  }
 
   // Notify ScalarEvolution that the loop will be substantially changed,
   // if not outright eliminated.

Modified: llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll?rev=271071&r1=271070&r2=271071&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll Fri May 27 18:15:06 2016
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-dynamic-cost-savings-discount=0 | FileCheck %s
+; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-dynamic-cost-savings-discount=0 | FileCheck %s
 
 ; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold.
 ; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling.




More information about the llvm-commits mailing list