[llvm] [LV][NFC] Clean up tail-folding check for early-exit loops (PR #133931)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 1 18:40:21 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Shih-Po Hung (arcbbb)
<details>
<summary>Changes</summary>
This patch moves the check for a single latch exit from computeMaxVF() to LoopVectorizationLegality::canFoldTailByMasking(), as it duplicates the logic when foldTailByMasking() returns false.
It also introduces HasSingleLatchExit to prevent early-exit loops from entering code paths that assume non-predicated loops.
---
Full diff: https://github.com/llvm/llvm-project/pull/133931.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+10)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+5-18)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3ec6850d6f685..0763a255b3afa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1924,6 +1924,16 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const {
}
}
+ // The only loops we can vectorize without a scalar epilogue, are loops with
+ // a bottom-test and a single exiting block. We'd have to handle the fact
+ // that not every instruction executes on the last iteration. This will
+ // require a lane mask which varies through the vector loop body. (TODO)
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Cannot fold tail by masking. Requires a singe latch exit\n");
+ return false;
+ }
LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
return true;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 55cc801e91452..a010f5c52e9a7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3987,22 +3987,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
break;
}
- // The only loops we can vectorize without a scalar epilogue, are loops with
- // a bottom-test and a single exiting block. We'd have to handle the fact
- // that not every instruction executes on the last iteration. This will
- // require a lane mask which varies through the vector loop body. (TODO)
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- // If there was a tail-folding hint/switch, but we can't fold the tail by
- // masking, fallback to a vectorization with a scalar epilogue.
- if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) {
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
- "scalar epilogue instead.\n");
- ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return computeFeasibleMaxVF(MaxTC, UserVF, false);
- }
- return FixedScalableVFPair::getNone();
- }
-
// Now try the tail folding
// Invalidate interleave groups that require an epilogue if we can't mask
@@ -4049,7 +4033,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return Rem->isZero();
};
- if (MaxPowerOf2RuntimeVF > 0u) {
+ bool HasSingleLatchExit =
+ TheLoop->getExitingBlock() == TheLoop->getLoopLatch();
+ if (HasSingleLatchExit && MaxPowerOf2RuntimeVF > 0u) {
assert((UserVF.isNonZero() || isPowerOf2_32(*MaxPowerOf2RuntimeVF)) &&
"MaxFixedVF must be a power of 2");
if (NoScalarEpilogueNeeded(*MaxPowerOf2RuntimeVF)) {
@@ -4060,7 +4046,8 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop);
- if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
+ if (HasSingleLatchExit && ExpectedTC &&
+ ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) {
if (MaxPowerOf2RuntimeVF > 0u) {
// If we have a low-trip-count, and the fixed-width VF is known to divide
// the trip count but the scalable factor does not, use the fixed-width
``````````
</details>
https://github.com/llvm/llvm-project/pull/133931
More information about the llvm-commits
mailing list