[llvm] 829b62a - [unroll] Split full exact and full bound unroll costing [NFC]

Mon Nov 29 14:18:56 PST 2021

Author: Philip Reames
Date: 2021-11-29T14:18:15-08:00
New Revision: 829b62adf5db189843b9a9ce626dfef97f76059f

URL: https://github.com/llvm/llvm-project/commit/829b62adf5db189843b9a9ce626dfef97f76059f
DIFF: https://github.com/llvm/llvm-project/commit/829b62adf5db189843b9a9ce626dfef97f76059f.diff

LOG: [unroll] Split full exact and full bound unroll costing [NFC]

This change should be NFC. It's posted for review mostly to make sure others are happy with the names I'm introducing for "exact full unroll" and "bounded full unroll". The motivation here is that our cost model for bounded unrolling is too aggressive - it gives benefits for exits we aren't going to prune - but I also just think the new version of the code is a lot easier to follow.

Differential Revision: https://reviews.llvm.org/D114453

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 3d067d4d78ebe..f8e179e8a8724 100644

--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -806,8 +806,9 @@ static Optional<unsigned> shouldFullUnroll(
     ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
     const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
     const TargetTransformInfo::UnrollingPreferences &UP) {
+  assert(FullUnrollTripCount && "should be non-zero!");
 
-  if (!FullUnrollTripCount || FullUnrollTripCount >= UP.FullUnrollMaxCount)
+  if (FullUnrollTripCount >= UP.FullUnrollMaxCount)
     return None;
 
   // When computing the unrolled size, note that BEInsns are not replicated
@@ -946,11 +947,21 @@ bool llvm::computeUnrollCount(
     }
   }
 
-  // 3rd priority is full unroll count.
-  // Full unroll makes sense only when TripCount or its upper bound could be
-  // statically calculated.
-  // Also we need to check if we exceed FullUnrollMaxCount.
+  // 3rd priority is exact full unrolling.  This will eliminate all copies
+  // of some exit test.
+  UP.Count = 0;
+  if (TripCount) {
+    UP.Count = TripCount;
+    UnrollFactor =
+      shouldFullUnroll(L, TTI, DT, SE, EphValues, TripCount, UCE, UP);
+    if (UnrollFactor) {
+      UP.Count = *UnrollFactor;
+      UseUpperBound = false;
+      return ExplicitUnroll;
+    }
+  }
 
+  // 4th priority is bounded unrolling.
   // We can unroll by the upper bound amount if it's generally allowed or if
   // we know that the loop is executed either the upper bound or zero times.
   // (MaxOrZero unrolling keeps only the first loop test, so the number of
@@ -959,35 +970,22 @@ bool llvm::computeUnrollCount(
   // number of loop tests goes up which may end up being worse on targets with
   // constrained branch predictor resources so is controlled by an option.)
   // In addition we only unroll small upper bounds.
-  unsigned FullUnrollMaxTripCount = MaxTripCount;
-  if (!(UP.UpperBound || MaxOrZero) ||
-      FullUnrollMaxTripCount > UnrollMaxUpperBound)
-    FullUnrollMaxTripCount = 0;
-
-  // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
-  // compute the former when the latter is zero.
-  unsigned ExactTripCount = TripCount;
-  assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
-         "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
-
-  unsigned FullUnrollTripCount =
-      ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
-  UP.Count = FullUnrollTripCount;
-
-  UnrollFactor =
-      shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
-
-  // if shouldFullUnroll can do the unrolling, some side parameteres should be
-  // set
-  if (UnrollFactor) {
-    UP.Count = *UnrollFactor;
-    UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
-    return ExplicitUnroll;
-  } else {
-    UP.Count = FullUnrollTripCount;
+  // Note that the cost of bounded unrolling is always strictly greater than
+  // cost of exact full unrolling.  As such, if we have an exact count and
+  // found it unprofitable, we'll never chose to bounded unroll.
+  if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
+      MaxTripCount <= UnrollMaxUpperBound) {
+    UP.Count = MaxTripCount;
+    UnrollFactor =
+      shouldFullUnroll(L, TTI, DT, SE, EphValues, MaxTripCount, UCE, UP);
+    if (UnrollFactor) {
+      UP.Count = *UnrollFactor;
+      UseUpperBound = true;
+      return ExplicitUnroll;
+    }
   }
 
-  // 4th priority is loop peeling.
+  // 5th priority is loop peeling.
   computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
   if (PP.PeelCount) {
     UP.Runtime = false;
@@ -1000,7 +998,7 @@ bool llvm::computeUnrollCount(
   if (TripCount)
     UP.Partial |= ExplicitUnroll;
 
-  // 5th priority is partial unrolling.
+  // 6th priority is partial unrolling.
   // Try partial unroll only when TripCount could be statically calculated.
   UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
 
@@ -1045,7 +1043,7 @@ bool llvm::computeUnrollCount(
                 "because loop has a runtime trip count.";
     });
 
-  // 6th priority is runtime unrolling.
+  // 7th priority is runtime unrolling.
   // Don't unroll a runtime trip count loop when it is disabled.
   if (hasRuntimeUnrollDisablePragma(L)) {
     UP.Count = 0;