[llvm] [LV] Fix MVE regression from #132190 (PR #141736)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 03:09:25 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Sam Tebbs (SamTebbs33)
<details>
<summary>Changes</summary>
Register pressure was only considered if the vector bandwidth was being maximised (chosen either by the target or user options), but #<!-- -->132190 inadvertently caused high pressure VFs to be pruned even when max bandwidth wasn't enabled. This PR returns to the previous behaviour.
---
Full diff: https://github.com/llvm/llvm-project/pull/141736.diff
1 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+21-6)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2fe59a464457f..ad3cbc6cd1e42 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -959,6 +959,10 @@ class LoopVectorizationCostModel {
return expectedCost(UserVF).isValid();
}
+ /// \return True if maximizing vector bandwidth is enabled by the target or
+ /// user options.
+ bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
+
/// \return The size (in bits) of the smallest and widest types in the code
/// that needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
@@ -3944,6 +3948,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
+bool LoopVectorizationCostModel::useMaxBandwidth(
+ TargetTransformInfo::RegisterKind RegKind) {
+ return MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
+ (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
+ (UseWiderVFIfCallVariantsPresent &&
+ Legal->hasVectorCallVariants())));
+}
+
ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
unsigned MaxTripCount, unsigned SmallestType, unsigned WidestType,
ElementCount MaxSafeVF, bool FoldTailByMasking) {
@@ -4009,10 +4021,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
ComputeScalableMaxVF ? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector;
ElementCount MaxVF = MaxVectorElementCount;
- if (MaximizeBandwidth ||
- (MaximizeBandwidth.getNumOccurrences() == 0 &&
- (TTI.shouldMaximizeVectorBandwidth(RegKind) ||
- (UseWiderVFIfCallVariantsPresent && Legal->hasVectorCallVariants())))) {
+ if (useMaxBandwidth(RegKind)) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
ComputeScalableMaxVF);
@@ -4384,7 +4393,10 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
/// Don't consider the VF if it exceeds the number of registers for the
/// target.
- if (RU.exceedsMaxNumRegs(TTI))
+ if (CM.useMaxBandwidth(VF.isScalable()
+ ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
+ RU.exceedsMaxNumRegs(TTI))
continue;
InstructionCost C = CM.expectedCost(VF);
@@ -7458,7 +7470,10 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
- if (RU.exceedsMaxNumRegs(TTI)) {
+ if (CM.useMaxBandwidth(VF.isScalable()
+ ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector) &&
+ RU.exceedsMaxNumRegs(TTI)) {
LLVM_DEBUG(dbgs() << "LV(REG): Not considering vector loop of width "
<< VF << " because it uses too many registers\n");
continue;
``````````
</details>
https://github.com/llvm/llvm-project/pull/141736
More information about the llvm-commits
mailing list