[llvm] e5263e3 - [LV][NFC] Clean up tail-folding check for early-exit loops (#133931)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 19:57:14 PDT 2025
Author: Shih-Po Hung
Date: 2025-04-18T10:57:11+08:00
New Revision: e5263e3ec83cf662f945a067ffc83e2ed76d9aed
URL: https://github.com/llvm/llvm-project/commit/e5263e3ec83cf662f945a067ffc83e2ed76d9aed
DIFF: https://github.com/llvm/llvm-project/commit/e5263e3ec83cf662f945a067ffc83e2ed76d9aed.diff
LOG: [LV][NFC] Clean up tail-folding check for early-exit loops (#133931)
This patch moves the check for a single latch exit from computeMaxVF()
to LoopVectorizationLegality::canFoldTailByMasking(), as it duplicates
the logic when foldTailByMasking() returns false.
It also updates the NoScalarEpilogueNeeded logic to return false for
loops that are neither single-latch-exit nor early-exit. This avoids
applying tail-folding in unsupported cases and prevents triggering
assertions during analysis.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3ec6850d6f685..8e09e6f8d4935 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1872,6 +1872,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
}
bool LoopVectorizationLegality::canFoldTailByMasking() const {
+ // The only loops we can vectorize without a scalar epilogue, are loops with
+ // a bottom-test and a single exiting block. We'd have to handle the fact
+ // that not every instruction executes on the last iteration. This will
+ // require a lane mask which varies through the vector loop body. (TODO)
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
+ return false;
+ }
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index db2fd300cb8f5..87fcf7e5d9742 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3968,22 +3968,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
break;
}
- // The only loops we can vectorize without a scalar epilogue, are loops with
- // a bottom-test and a single exiting block. We'd have to handle the fact
- // that not every instruction executes on the last iteration. This will
- // require a lane mask which varies through the vector loop body. (TODO)
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- // If there was a tail-folding hint/switch, but we can't fold the tail by
- // masking, fallback to a vectorization with a scalar epilogue.
- if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
- "scalar epilogue instead.\n");
- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return computeFeasibleMaxVF(MaxTC, UserVF, false);
- }
- return FixedScalableVFPair::getNone();
- }
-
// Now try the tail folding
// Invalidate interleave groups that require an epilogue if we can't mask
@@ -4013,14 +3997,19 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
auto NoScalarEpilogueNeeded = [this, &UserIC](unsigned MaxVF) {
+ // Return false if the loop is neither a single-latch-exit loop nor an
+ // early-exit loop as tail-folding is not supported in that case.
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
+ !Legal->hasUncountableEarlyExit())
+ return false;
unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF;
ScalarEvolution *SE = PSE.getSE();
- // Currently only loops with countable exits are vectorized, but calling
- // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
- // uncountable exits whilst also ensuring the symbolic maximum and known
- // back-edge taken count remain identical for loops with countable exits.
+ // Calling getSymbolicMaxBackedgeTakenCount enables support for loops
+ // with uncountable exits. For countable loops, the symbolic maximum must
+ // remain identical to the known back-edge taken count.
const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
- assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() &&
+ assert((Legal->hasUncountableEarlyExit() ||
+ BackedgeTakenCount == PSE.getBackedgeTakenCount()) &&
"Invalid loop count");
const SCEV *ExitCount = SE->getAddExpr(
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
More information about the llvm-commits
mailing list