[llvm] SCEV: return std::nullopt for invalid TC (NFC) (PR #94162)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 4 02:53:27 PDT 2024
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/94162
>From 9e2a0326e0219d0ea4def9cea6b503f4251fb1f1 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sun, 2 Jun 2024 11:17:25 +0100
Subject: [PATCH 1/2] SCEV: avoid conflating unknown TC with wrap (NFC)
ScalarEvolution::getSmallConstantTripCount and
getSmallConstantMaxTripCount have a special zero return value to
indicate that the trip count is unknown or on unsigned-wrap. This can
cause confusion if callers aren't careful. Change it to never wrap, and
return an std::optional that has a value on valid trip counts. This
patch doesn't show the benefits of the change, and uses value_or(0) to
migrate many callers in what is a non-functional change. Improvements
are planned for future patches.
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 26 ++++++++----------
llvm/lib/Analysis/Loads.cpp | 4 +--
llvm/lib/Analysis/LoopCacheAnalysis.cpp | 7 ++---
llvm/lib/Analysis/ScalarEvolution.cpp | 27 ++++++++++---------
.../Hexagon/HexagonTargetTransformInfo.cpp | 5 ++--
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 2 +-
.../Transforms/Scalar/LoopDataPrefetch.cpp | 3 ++-
llvm/lib/Transforms/Scalar/LoopFuse.cpp | 4 +--
.../Transforms/Scalar/LoopStrengthReduce.cpp | 8 +++---
.../Scalar/LoopUnrollAndJamPass.cpp | 5 ++--
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 7 ++---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 4 +--
.../Transforms/Vectorize/LoopVectorize.cpp | 27 ++++++++++---------
.../unittests/Analysis/UnrollAnalyzerTest.cpp | 2 +-
14 files changed, 68 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 72f3d94542496..6b51407f95a9b 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -810,27 +810,23 @@ class ScalarEvolution {
const Loop *L);
/// Returns the exact trip count of the loop if we can compute it, and
- /// the result is a small constant. '0' is used to represent an unknown
- /// or non-constant trip count. Note that a trip count is simply one more
+ /// the result is a small constant. Note that a trip count is simply one more
/// than the backedge taken count for the loop.
- unsigned getSmallConstantTripCount(const Loop *L);
+ std::optional<unsigned> getSmallConstantTripCount(const Loop *L);
/// Return the exact trip count for this loop if we exit through ExitingBlock.
- /// '0' is used to represent an unknown or non-constant trip count. Note
- /// that a trip count is simply one more than the backedge taken count for
- /// the same exit.
- /// This "trip count" assumes that control exits via ExitingBlock. More
- /// precisely, it is the number of times that control will reach ExitingBlock
- /// before taking the branch. For loops with multiple exits, it may not be
- /// the number times that the loop header executes if the loop exits
- /// prematurely via another branch.
- unsigned getSmallConstantTripCount(const Loop *L,
- const BasicBlock *ExitingBlock);
+ /// Note that a trip count is simply one more than the backedge taken count
+ /// for the same exit. This "trip count" assumes that control exits via
+ /// ExitingBlock. More precisely, it is the number of times that control will
+ /// reach ExitingBlock before taking the branch. For loops with multiple
+ /// exits, it may not be the number times that the loop header executes if the
+ /// loop exits prematurely via another branch.
+ std::optional<unsigned>
+ getSmallConstantTripCount(const Loop *L, const BasicBlock *ExitingBlock);
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
- /// Returns 0 if the trip count is unknown or not constant.
- unsigned getSmallConstantMaxTripCount(const Loop *L);
+ std::optional<unsigned> getSmallConstantMaxTripCount(const Loop *L);
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 478302d687b53..0549216c7a425 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -287,7 +287,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L);
+ std::optional<unsigned> TC = SE.getSmallConstantMaxTripCount(L);
if (!TC)
return false;
@@ -301,7 +301,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
// same.
// For patterns with gaps (i.e. non unit stride), we are
// accessing EltSize bytes at every Step.
- APInt AccessSize = TC * Step->getAPInt();
+ APInt AccessSize = *TC * Step->getAPInt();
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
"implied by addrec definition");
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 7ca9f15ad5fca..27803365a12f8 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -568,9 +568,10 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
assert(!Loops.empty() && "Expecting a non-empty loop vector.");
for (const Loop *L : Loops) {
- unsigned TripCount = SE.getSmallConstantTripCount(L);
- TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
- TripCounts.push_back({L, TripCount});
+ std::optional<unsigned> TripCount = SE.getSmallConstantTripCount(L);
+ if (!TripCount)
+ TripCount = DefaultTripCount;
+ TripCounts.push_back({L, *TripCount});
}
calculateCacheFootprint();
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e46d7183a2a35..1927ae62cda8e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6463,7 +6463,7 @@ getRangeForUnknownRecurrence(const SCEVUnknown *U) {
// TODO: Handle the power function forms some day.
return FullSet;
- unsigned TC = getSmallConstantMaxTripCount(L);
+ std::optional<unsigned> TC = getSmallConstantMaxTripCount(L);
if (!TC || TC >= BitWidth)
return FullSet;
@@ -6474,7 +6474,7 @@ getRangeForUnknownRecurrence(const SCEVUnknown *U) {
// Compute total shift amount, being careful of overflow and bitwidths.
auto MaxShiftAmt = KnownStep.getMaxValue();
- APInt TCAP(BitWidth, TC-1);
+ APInt TCAP(BitWidth, *TC - 1);
bool Overflow = false;
auto TotalShift = MaxShiftAmt.umul_ov(TCAP, Overflow);
if (Overflow)
@@ -8174,26 +8174,28 @@ const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
return getAddExpr(getTruncateOrZeroExtend(ExitCount, EvalTy), getOne(EvalTy));
}
-static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
+static std::optional<unsigned>
+getConstantTripCount(const SCEVConstant *ExitCount) {
if (!ExitCount)
- return 0;
+ return std::nullopt;
ConstantInt *ExitConst = ExitCount->getValue();
- // Guard against huge trip counts.
- if (ExitConst->getValue().getActiveBits() > 32)
- return 0;
+ // Guanteed to never overflow.
+ if (std::optional<uint64_t> V = ExitConst->getValue().tryZExtValue())
+ if (V < std::numeric_limits<unsigned>::max())
+ return ((unsigned)*V) + 1;
- // In case of integer overflow, this returns 0, which is correct.
- return ((unsigned)ExitConst->getZExtValue()) + 1;
+ return std::nullopt;
}
-unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
+std::optional<unsigned>
+ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
auto *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L, Exact));
return getConstantTripCount(ExitCount);
}
-unsigned
+std::optional<unsigned>
ScalarEvolution::getSmallConstantTripCount(const Loop *L,
const BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
@@ -8204,7 +8206,8 @@ ScalarEvolution::getSmallConstantTripCount(const Loop *L,
return getConstantTripCount(ExitCount);
}
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
+std::optional<unsigned>
+ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
return getConstantTripCount(MaxExitCount);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index f47fcff5d6025..82e843996ed2d 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -88,9 +88,8 @@ void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
// Only try to peel innermost loops with small runtime trip counts.
- if (L && L->isInnermost() && canPeel(L) &&
- SE.getSmallConstantTripCount(L) == 0 &&
- SE.getSmallConstantMaxTripCount(L) > 0 &&
+ if (L && L->isInnermost() && canPeel(L) && !SE.getSmallConstantTripCount(L) &&
+ SE.getSmallConstantMaxTripCount(L) &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
PP.PeelCount = 2;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3fa35efc2d159..cea12d68a5b12 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -346,7 +346,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
SchedModel.init(ST);
// Do not convert small short loops to CTR loop.
- unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
+ unsigned ConstTripCount = SE.getSmallConstantTripCount(L).value_or(0);
if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index cc1f56014eee9..a135ee2515462 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -316,7 +316,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
if (ItersAhead > getMaxPrefetchIterationsAhead())
return MadeChange;
- unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ unsigned ConstantMaxTripCount =
+ SE->getSmallConstantMaxTripCount(L).value_or(0);
if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
return MadeChange;
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index e0b224d5ef735..bf861d82925dd 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -754,8 +754,8 @@ struct LoopFuser {
// Currently only considering loops with a single exit point
// and a non-constant trip count.
- const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
- const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
+ const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L).value_or(0);
+ const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L).value_or(0);
// If any of the tripcounts are zero that means that loop(s) do not have
// a single exit or a constant tripcount.
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 35a17d6060c94..db938b86dc1b7 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3152,7 +3152,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -6352,7 +6352,7 @@ struct SCEVDbgValueBuilder {
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
Op.appendToVector(DestExpr);
continue;
- }
+ }
DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
@@ -6822,8 +6822,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
// the allowed cost with the loops trip count as best we can.
const unsigned ExpansionBudget = [&]() {
unsigned Budget = 2 * SCEVCheapExpansionBudget;
- if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
- return std::min(Budget, SmallTC);
+ if (std::optional<unsigned> SmallTC = SE.getSmallConstantMaxTripCount(L))
+ return std::min(Budget, *SmallTC);
if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
return std::min(Budget, *SmallTC);
// Unknown trip count, assume long running by default.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 7b4c54370e48a..a10ff525317f8 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -363,9 +363,10 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
// Find trip count and trip multiple
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
- unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);
+ unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch).value_or(0);
unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);
- unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);
+ unsigned InnerTripCount =
+ SE.getSmallConstantTripCount(SubLoop, SubLoopLatch).value_or(0);
// Decide if, and by how much, to unroll
bool IsCountSetExplicitly = computeUnrollAndJamCount(
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 10fc9e9303e89..b3506fc9ca9dd 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1234,9 +1234,10 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *ExitingBlock : ExitingBlocks)
- if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock))
+ if (std::optional<unsigned> TC =
+ SE.getSmallConstantTripCount(L, ExitingBlock))
if (!TripCount || TC < TripCount)
- TripCount = TripMultiple = TC;
+ TripCount = TripMultiple = *TC;
if (!TripCount) {
// If no exact trip count is known, determine the trip multiple of either
@@ -1269,7 +1270,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
unsigned MaxTripCount = 0;
bool MaxOrZero = false;
if (!TripCount) {
- MaxTripCount = SE.getSmallConstantMaxTripCount(L);
+ MaxTripCount = SE.getSmallConstantMaxTripCount(L).value_or(0);
MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 1216538195fbd..d057b45a5bd1b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -477,7 +477,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
- const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+ const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L).value_or(0);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
unsigned EstimatedLoopInvocationWeight = 0;
std::optional<unsigned> OriginalTripCount =
@@ -507,7 +507,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
continue;
ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
- Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock).value_or(0);
Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
if (Info.TripCount != 0) {
Info.BreakoutTrip = Info.TripCount % ULO.Count;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6628f3d53f56a..77bc924e1faa9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -428,7 +428,7 @@ static unsigned getReciprocalPredBlockProb() { return 2; }
static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
Loop *L) {
// Check if exact trip count is known.
- if (unsigned ExpectedTC = SE.getSmallConstantTripCount(L))
+ if (std::optional<unsigned> ExpectedTC = SE.getSmallConstantTripCount(L))
return ExpectedTC;
// Check if there is an expected trip count available from profile data.
@@ -437,7 +437,7 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
return *EstimatedTC;
// Check if upper bound estimate is known.
- if (unsigned ExpectedTC = SE.getSmallConstantMaxTripCount(L))
+ if (std::optional<unsigned> ExpectedTC = SE.getSmallConstantMaxTripCount(L))
return ExpectedTC;
return std::nullopt;
@@ -2046,8 +2046,9 @@ class GeneratedRTChecks {
unsigned BestTripCount = 2;
// If exact trip count is known use that.
- if (unsigned SmallTC = SE->getSmallConstantTripCount(OuterLoop))
- BestTripCount = SmallTC;
+ if (std::optional<unsigned> SmallTC =
+ SE->getSmallConstantTripCount(OuterLoop))
+ BestTripCount = *SmallTC;
else if (LoopVectorizeWithBlockFrequency) {
// Else use profile data if available.
if (auto EstimatedTC = getLoopEstimatedTripCount(OuterLoop))
@@ -2382,7 +2383,7 @@ static bool isIndvarOverflowCheckKnownFalse(
// We know the runtime overflow check is known false iff the (max) trip-count
// is known and (max) trip-count + (VF * UF) does not overflow in the type of
// the vector loop induction variable.
- if (unsigned TC =
+ if (std::optional<unsigned> TC =
Cost->PSE.getSE()->getSmallConstantMaxTripCount(Cost->TheLoop)) {
uint64_t MaxVF = VF.getKnownMinValue();
if (VF.isScalable()) {
@@ -2393,7 +2394,7 @@ static bool isIndvarOverflowCheckKnownFalse(
MaxVF *= *MaxVScale;
}
- return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF);
+ return (MaxUIntTripCount - *TC).ugt(MaxVF * MaxUF);
}
return false;
@@ -4563,8 +4564,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
- unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- unsigned MaxTC = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop).value_or(0);
+ unsigned MaxTC =
+ PSE.getSE()->getSmallConstantMaxTripCount(TheLoop).value_or(0);
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
if (TC == 1) {
reportVectorizationFailure("Single iteration (non) loop",
@@ -4856,7 +4858,8 @@ bool LoopVectorizationPlanner::isMoreProfitable(
InstructionCost CostA = A.Cost;
InstructionCost CostB = B.Cost;
- unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
+ unsigned MaxTripCount =
+ PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop).value_or(0);
// Improve estimate for the vector width if it is scalable.
unsigned EstimatedWidthA = A.Width.getKnownMinValue();
@@ -5388,12 +5391,12 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
}
assert(EstimatedVF >= 1 && "Estimated VF shouldn't be less than 1");
- unsigned KnownTC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- if (KnownTC > 0) {
+ if (std::optional<unsigned> KnownTC =
+ PSE.getSE()->getSmallConstantTripCount(TheLoop)) {
// At least one iteration must be scalar when this constraint holds. So the
// maximum available iterations for interleaving is one less.
unsigned AvailableTC =
- requiresScalarEpilogue(VF.isVector()) ? KnownTC - 1 : KnownTC;
+ requiresScalarEpilogue(VF.isVector()) ? *KnownTC - 1 : *KnownTC;
// If trip count is known we select between two prospective ICs, where
// 1) the aggressive IC is capped by the trip count divided by VF
diff --git a/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp b/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
index 721d67f22f2f2..585e8875454c5 100644
--- a/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
+++ b/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
@@ -41,7 +41,7 @@ runUnrollAnalyzer(Module &M, StringRef FuncName,
BasicBlock *Exiting = L->getExitingBlock();
SimplifiedValuesVector.clear();
- unsigned TripCount = SE.getSmallConstantTripCount(L, Exiting);
+ unsigned TripCount = SE.getSmallConstantTripCount(L, Exiting).value_or(0);
for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) {
DenseMap<Value *, Value *> SimplifiedValues;
UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
>From ec834d7f97c7318cf37f5b743e5026b7763f533f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 4 Jun 2024 10:28:51 +0100
Subject: [PATCH 2/2] address review
---
llvm/lib/Analysis/LoopCacheAnalysis.cpp | 7 +++----
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp | 4 ++--
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 2 +-
3 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 27803365a12f8..2dba1f2abc7df 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -568,10 +568,9 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
assert(!Loops.empty() && "Expecting a non-empty loop vector.");
for (const Loop *L : Loops) {
- std::optional<unsigned> TripCount = SE.getSmallConstantTripCount(L);
- if (!TripCount)
- TripCount = DefaultTripCount;
- TripCounts.push_back({L, *TripCount});
+ unsigned TripCount =
+ SE.getSmallConstantTripCount(L).value_or(DefaultTripCount);
+ TripCounts.push_back({L, TripCount});
}
calculateCacheFootprint();
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 82e843996ed2d..590a58820b925 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -87,10 +87,10 @@ void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
+ std::optional<unsigned> MaxTripCount = SE.getSmallConstantMaxTripCount(L);
// Only try to peel innermost loops with small runtime trip counts.
if (L && L->isInnermost() && canPeel(L) && !SE.getSmallConstantTripCount(L) &&
- SE.getSmallConstantMaxTripCount(L) &&
- SE.getSmallConstantMaxTripCount(L) <= 5) {
+ MaxTripCount && MaxTripCount <= 5) {
PP.PeelCount = 2;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index cea12d68a5b12..e4aeef3360b79 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -346,7 +346,7 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
SchedModel.init(ST);
// Do not convert small short loops to CTR loop.
- unsigned ConstTripCount = SE.getSmallConstantTripCount(L).value_or(0);
+ std::optional<unsigned> ConstTripCount = SE.getSmallConstantTripCount(L);
if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
More information about the llvm-commits
mailing list