[llvm] cc7bcef - Reapply: [NFC] factor out unrolling decision logic
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 18 12:05:34 PDT 2021
Author: Ali Sedaghati
Date: 2021-08-18T12:04:33-07:00
New Revision: cc7bcef3e3582196bba9d7d88efbd42a110b39f4
URL: https://github.com/llvm/llvm-project/commit/cc7bcef3e3582196bba9d7d88efbd42a110b39f4
DIFF: https://github.com/llvm/llvm-project/commit/cc7bcef3e3582196bba9d7d88efbd42a110b39f4.diff
LOG: Reapply: [NFC] factor out unrolling decision logic
reverting ffd8a268bdc518f87e9ba7524aba0458f4b9979c (reapplying
4d559837e887c278d7c27274f4f6b1b78b97c00d) - removed spurious inclusion
of <optional>
Differential Revision: https://reviews.llvm.org/D106001
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 34dad2ac213b..a3aeb670ec14 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -319,6 +319,16 @@ struct EstimatedUnrollCost {
unsigned RolledDynamicCost;
};
+struct PragmaInfo {
+ PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+ : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
+ PragmaEnableUnroll(PEU) {}
+ const bool UserUnrollCount;
+ const bool PragmaFullUnroll;
+ const unsigned PragmaCount;
+ const bool PragmaEnableUnroll;
+};
+
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
@@ -747,13 +757,132 @@ class UnrollCostEstimator {
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
- uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
+ uint64_t
+ getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+ const unsigned CountOverwrite = 0) const {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
- return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+ if (CountOverwrite)
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
+ UP.BEInsns;
+ else
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
+ UP.BEInsns;
}
};
+static Optional<unsigned>
+shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
+ const unsigned TripMultiple, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ // Using unroll pragma
+ // 1st priority is unroll count set by "unroll-count" option.
+
+ if (PInfo.UserUnrollCount) {
+ if (UP.AllowRemainder &&
+ UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+ return (unsigned)UnrollCount;
+ }
+
+ // 2nd priority is unroll count set by pragma.
+ if (PInfo.PragmaCount > 0) {
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
+ UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ return PInfo.PragmaCount;
+ }
+
+ if (PInfo.PragmaFullUnroll && TripCount != 0) {
+ if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
+ return TripCount;
+ }
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
+
+static Optional<unsigned> shouldFullUnroll(
+ Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+ return FullUnrollTripCount;
+
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ return FullUnrollTripCount;
+ }
+ }
+ }
+ }
+ return None;
+}
+
+static Optional<unsigned>
+shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ unsigned count = UP.Count;
+ if (TripCount) {
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ count = 0;
+ return count;
+ }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
+ }
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
+ }
+
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -771,7 +900,18 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
+ Optional<unsigned> UnrollFactor;
+
+ const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+ const unsigned PragmaCount = unrollCountPragmaValue(L);
+ const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+
+ const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
+ PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
+ PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@@ -783,47 +923,29 @@ bool llvm::computeUnrollCount(
UP.Runtime = false;
return true;
}
-
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
- bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
- if (UserUnrollCount) {
- UP.Count = UnrollCount;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
- return true;
- }
-
// 2nd priority is unroll count set by pragma.
- unsigned PragmaCount = unrollCountPragmaValue(L);
- if (PragmaCount > 0) {
- UP.Count = PragmaCount;
- UP.Runtime = true;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return true;
- }
- bool PragmaFullUnroll = hasUnrollFullPragma(L);
- if (PragmaFullUnroll && TripCount != 0) {
- UP.Count = TripCount;
- if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return false;
- }
+ UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
- bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
- bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
- PragmaEnableUnroll || UserUnrollCount;
-
- if (ExplicitUnroll && TripCount != 0) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
- // value which is larger than the default limits.
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+
+ if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ }
+ UP.Runtime |= (PragmaCount > 0);
+ return ExplicitUnroll;
+ } else {
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
}
// 3rd priority is full unroll count.
@@ -853,28 +975,20 @@ bool llvm::computeUnrollCount(
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- }
- }
- }
+
+ UnrollFactor =
+ shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
+
+ // if shouldFullUnroll can do the unrolling, some side parameteres should be
+ // set
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
+ return ExplicitUnroll;
+ } else {
+ UP.Count = FullUnrollTripCount;
}
// 4th priority is loop peeling.
@@ -885,39 +999,31 @@ bool llvm::computeUnrollCount(
return ExplicitUnroll;
}
+ // Before starting partial unrolling, set up.partial to true,
+ // if user explicitly asked for unrolling
+ if (TripCount)
+ UP.Partial |= ExplicitUnroll;
+
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- if (TripCount) {
- UP.Partial |= ExplicitUnroll;
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- UP.Count = 0;
- return false;
- }
- if (UP.Count == 0)
- UP.Count = TripCount;
+ UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
+
if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count =
- (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- while (UP.Count != 0 && TripCount % UP.Count != 0)
- UP.Count--;
- if (UP.AllowRemainder && UP.Count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- UP.Count = UP.DefaultUnrollRuntimeCount;
- while (UP.Count != 0 &&
- UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count >>= 1;
- }
- if (UP.Count < 2) {
+ if (UP.Count == 0) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -927,25 +1033,8 @@ bool llvm::computeUnrollCount(
"pragma "
"because unrolled size is too large.";
});
- UP.Count = 0;
}
- } else {
- UP.Count = TripCount;
}
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- UP.Count != TripCount)
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE,
- "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma "
- "because "
- "unrolled size is too large.";
- });
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
- << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -982,8 +1071,6 @@ bool llvm::computeUnrollCount(
UP.AllowExpensiveTripCount = true;
}
}
-
- // Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
@@ -1018,7 +1105,7 @@ bool llvm::computeUnrollCount(
using namespace ore;
- if (PragmaCount > 0 && !UP.AllowRemainder)
+ if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",
More information about the llvm-commits
mailing list