[llvm] [SLP][NFC] Refactor to prepare for constant stride stores (PR #185997)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 17 16:59:44 PDT 2026
https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/185997
>From 6b21baf3fcea35a1e7fe817655d1cc58743a2a1b Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 9 Mar 2026 23:18:46 -0700
Subject: [PATCH 01/16] [SLP][NFC] Fix typo
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 97ba8eee6742c..5a7be54f122bb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21148,7 +21148,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *StrideVal;
const StridedPtrInfo &SPtrInfo = TreeEntryToStridedPtrInfoMap.at(E);
StridedLoadTy = SPtrInfo.Ty;
- assert(StridedLoadTy && "Missing StridedPoinerInfo for tree entry.");
+ assert(StridedLoadTy && "Missing StridedPointerInfo for tree entry.");
unsigned StridedLoadEC =
StridedLoadTy->getElementCount().getKnownMinValue();
>From 688f4b23822ba2c17bd6fbd2c2525f2b84e3de2a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 10 Mar 2026 15:55:31 -0700
Subject: [PATCH 02/16] [SLP][NFC] Fix off by one error in MaxTotalNum
calculation
Potentially sligh performance improvement.
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5a7be54f122bb..0cab5d5c615eb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24943,10 +24943,8 @@ bool SLPVectorizerPass::vectorizeStores(
const unsigned MaxTotalNum = std::min<unsigned>(
Operands.size(),
static_cast<unsigned>(
- End -
- std::distance(RangeSizes.begin(),
- find_if(RangeSizes, IsNotVectorized)) +
- 1));
+ End - std::distance(RangeSizes.begin(),
+ find_if(RangeSizes, IsNotVectorized))));
unsigned VF = bit_ceil(CandidateVFs.front()) * 2;
if (VF > MaxTotalNum || VF >= StoresLimit)
break;
>From 0a2c7e7af0a21be0071a69140ac13720a056c15e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 27 Feb 2026 12:20:39 -0800
Subject: [PATCH 03/16] [SLP][NFC] Refactor vectorizeStores() to create all
chains first, then vectorize by VF
Try vectorizing all chains with VF=MaxVF, then VF=MaxVF/2, etc. Will be important
when strided stores come about since chains will overlap.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 549 ++++++++++++------
1 file changed, 361 insertions(+), 188 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0cab5d5c615eb..336e1bdebf76f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24570,13 +24570,281 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
return false;
}
+namespace {
+/// A group of related stores which we are in the process of vectorizing,
+/// a subset of which may already vectorized. Stores context information
+/// about the group as a whole as well as information about what VF's need
+/// to be attempted still.
+struct StoreChainContext {
+ using SizePair = std::pair<unsigned, unsigned>;
+ using SizePairArrayRef = MutableArrayRef<SizePair>;
+
+ const TargetTransformInfo &TTI;
+
+ /// For the StoreTy/Stride in the given group, what is the smallest VF
+ /// that can be used
+ unsigned MinVF = 0;
+ /// Maximum number of instructions that can be vectorized, either
+ /// constrained by register width or operands size.
+ unsigned MaxVF = 0;
+ /// The largest VF checked in the current Repeat
+ unsigned ProbeVF = 0;
+ /// MaxRegVF represents the number of instructions (scalar, or vector in
+ /// case of revec) that can be vectorized to naturally fit in a vector
+ /// register.
+ unsigned MaxRegVF = 0;
+ /// How many times has CandidateVFs been refilled, prevents excessive
+ /// attempts at vectorizing large VFs
+ unsigned Repeat = 1;
+ /// Did any vectorization occur for the current iteration over CandidateVFs
+ bool RepeatChanged = false;
+ /// Are we finished checking this StoreChainContext? Can be due to all VFs
+ /// being checked, or an early exit condition
+ bool Done = false;
+ /// What element index is the end of the to be vectorized Operands
+ /// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
+ unsigned End = 0;
+
+ /// Stores that compose this chain
+ BoUpSLP::ValueList Operands;
+ /// Which VFs do we want to attempt for this chain
+ std::queue<unsigned> CandidateVFs;
+ /// Track the TreeSizes of prior vectorization attempts using each element,
+ /// to help us find early exit cases
+ /// .first contains pointer into RangeSizesByIdx to help us track
+ /// vectorization of elements that belong to multiple chains
+ SmallVector<SizePair> RangeSizesStorage;
+ SizePairArrayRef RangeSizes;
+ /// Store information about failed vectorization attempts due to scheduling
+ DenseMap<Value *, SizePair> NonSchedulable;
+ /// Type of the Stores in `Operands`
+ Type *StoreTy = nullptr;
+
+ /// RangeSize information for all elements in any chain
+ /// Needed since may be overlap between chains
+ inline static SmallVector<unsigned> RangeSizesByIdx;
+ /// Element has not been vectorized, but due to the elements around it being
+ /// vectorized, it does not have enough neighboring elements to make a chain
+ /// longer than MinVF as part of the current Context
+ inline static const unsigned LocallyUnvectorizable =
+ std::numeric_limits<unsigned>::max();
+
+ explicit StoreChainContext(const TargetTransformInfo &TTI,
+ ArrayRef<Value *> Ops,
+ ArrayRef<SizePair> RangeSizes)
+ : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes) {}
+
+ static bool isNotVectorized(const SizePair &P) {
+ return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
+ }
+
+ static bool isVectorized(const SizePair &P) {
+ return P.first == LocallyUnvectorizable || RangeSizesByIdx[P.first] == 0;
+ }
+
+ static bool vfIsProfitable(unsigned Size, const SizePair &P) {
+ assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
+ "Cannot check profitability of vectorized element");
+ return Size >= RangeSizesByIdx[P.first];
+ }
+
+ static bool firstSizeSame(unsigned Size, const SizePair &P) {
+ assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
+ "Cannot check profitability of vectorized element");
+ return Size == RangeSizesByIdx[P.first];
+ }
+
+ // Return the index of the first unvectorized store after \p StartIdx
+ unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
+ return std::distance(
+ RangeSizes.begin(),
+ find_if(RangeSizes.drop_front(StartIdx), isNotVectorized));
+ }
+
+ // Return the index of the first vectorized store after \p StartIdx
+ unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
+ return std::distance(
+ RangeSizes.begin(),
+ find_if(RangeSizes.drop_front(StartIdx), isVectorized));
+ }
+
+ // Return true if all stores have been vectorized
+ bool allVectorized() const { return all_of(RangeSizes, isVectorized); }
+
+ // Return true if all elements in the given range match \p TreeSize
+ bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const {
+ return all_of(
+ RangeSizes.slice(StartIdx, Length),
+ [TreeSize](const SizePair &P) { return firstSizeSame(TreeSize, P); });
+ }
+
+ // Return true if the \p TreeSize is profitable for all elements in the range
+ bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const {
+ return all_of(
+ RangeSizes.slice(StartIdx, Length),
+ [TreeSize](const SizePair &P) { return vfIsProfitable(TreeSize, P); });
+ }
+
+ // Update the live (first) range sizes from the cached values (second)
+ void updateRangeSizesFromCache() {
+ for (SizePair &P : RangeSizes) {
+ if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
+ RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
+ }
+ }
+
+ // Update the cached (second) range sizes with the given \p TreeSize
+ void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) {
+ for (SizePair &P : RangeSizes.slice(StartIdx, Length))
+ P.second = std::max(P.second, TreeSize);
+ }
+
+ // Update CandidateVFs for secondary iterations
+ bool updateCandidateVFs() {
+ assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
+ constexpr unsigned StoresLimit = 64;
+ const unsigned MaxTotalNum = std::min<unsigned>(
+ Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
+ unsigned VF = bit_ceil(ProbeVF) * 2;
+ if (VF > MaxTotalNum || VF >= StoresLimit)
+ return false;
+ // Attempt again to vectorize even larger chains if all previous
+ // attempts were unsuccessful because of the cost issues.
+ unsigned Limit =
+ getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
+ if (bit_floor(Limit) == VF && Limit != VF)
+ CandidateVFs.push(Limit);
+ CandidateVFs.push(VF);
+ ProbeVF = CandidateVFs.front();
+ ++Repeat;
+ RepeatChanged = false;
+ return true;
+ }
+
+ // Get the current VF
+ std::optional<unsigned> getCurrentVF() const {
+ if (Done || CandidateVFs.empty())
+ return std::nullopt;
+ return CandidateVFs.front();
+ }
+
+ // Increment the VF-Index counter, return false if at end of CandidateVFs
+ void incrementVF() { CandidateVFs.pop(); }
+
+ // Set up initial values using the already set Operands
+ bool initializeContext(BoUpSLP &R, const DataLayout &DL);
+
+ // Record vectorization of the provided range
+ void markRangeVectorized(unsigned StartIdx, unsigned Length,
+ unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+};
+
+void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
+ unsigned &FirstUnvecStore,
+ unsigned &MaxSliceEnd) {
+ for (StoreChainContext::SizePair &P : RangeSizes.slice(StartIdx, Length))
+ RangeSizesByIdx[P.first] = P.second = 0;
+ if (StartIdx < FirstUnvecStore + MinVF) {
+ for (StoreChainContext::SizePair &P :
+ RangeSizes.slice(FirstUnvecStore, StartIdx - FirstUnvecStore)) {
+ P.first = LocallyUnvectorizable;
+ P.second = 0;
+ }
+ FirstUnvecStore = StartIdx + Length;
+ }
+ if (StartIdx + Length > MaxSliceEnd - MinVF) {
+ for (StoreChainContext::SizePair &P : RangeSizes.slice(
+ StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
+ P.first = LocallyUnvectorizable;
+ P.second = 0;
+ }
+ if (MaxSliceEnd == End)
+ End = StartIdx;
+ MaxSliceEnd = StartIdx;
+ }
+}
+
+bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
+ // Initialize range tracking in context.
+ RangeSizes = MutableArrayRef(RangeSizesStorage);
+
+ unsigned MaxVecRegSize = R.getMaxVecRegSize();
+ unsigned EltSize = R.getVectorElementSize(Operands[0]);
+ unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
+
+ MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
+ auto *Store = cast<StoreInst>(Operands[0]);
+ StoreTy = Store->getValueOperand()->getType();
+ Type *ValueTy = StoreTy;
+ if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
+ ValueTy = Trunc->getSrcTy();
+ // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
+ // getStoreMinimumVF only support scalar type as arguments. As a result,
+ // we need to use the element type of StoreTy and ValueTy to retrieve the
+ // VF and then transform it back.
+ // Remember: VF is defined as the number we want to vectorize, not the
+ // number of elements in the final vector.
+ Type *StoreScalarTy = StoreTy->getScalarType();
+ MinVF = PowerOf2Ceil(TTI.getStoreMinimumVF(
+ R.getMinVF(DL.getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
+ ValueTy->getScalarType()));
+ MinVF /= getNumElements(StoreTy);
+ MinVF = std::max<unsigned>(2, MinVF);
+
+ if (MaxVF < MinVF) {
+ LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
+ << ") < "
+ << "MinVF (" << MinVF << ")\n");
+ Done = true;
+ return false;
+ }
+
+ unsigned NonPowerOf2VF = 0;
+ if (VectorizeNonPowerOf2) {
+ // First try vectorizing with a non-power-of-2 VF. At the moment, only
+ // consider cases where VF + 1 is a power-of-2, i.e. almost all vector
+ // lanes are used.
+ unsigned CandVF = std::clamp<unsigned>(Operands.size(), MinVF, MaxVF);
+ if (has_single_bit(CandVF + 1)) {
+ NonPowerOf2VF = CandVF;
+ assert(NonPowerOf2VF != MaxVF &&
+ "Non-power-of-2 VF should not be equal to MaxVF");
+ }
+ }
+
+ MaxRegVF = MaxVF;
+
+ MaxVF = std::min<unsigned>(MaxVF, bit_floor(Operands.size()));
+ if (MaxVF < MinVF) {
+ LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
+ << ") < "
+ << "MinVF (" << MinVF << ")\n");
+ Done = true;
+ return false;
+ }
+
+ for (unsigned VF = std::max(MaxVF, NonPowerOf2VF); VF >= MinVF;
+ VF = divideCeil(VF, 2))
+ CandidateVFs.push(VF);
+
+ End = Operands.size();
+ ProbeVF = MaxVF;
+ return true;
+}
+} // namespace
+
/// Checks if the quadratic mean deviation is less than 90% of the mean size.
static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
unsigned Num = 0;
uint64_t Sum = std::accumulate(
Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
[&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
- unsigned Size = Val.first;
+ unsigned Size = Val.first == StoreChainContext::LocallyUnvectorizable
+ ? 0
+ : StoreChainContext::RangeSizesByIdx[Val.first];
if (Size == 1)
return V;
++Num;
@@ -24590,7 +24858,10 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
uint64_t Dev = std::accumulate(
Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
[&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
- unsigned P = Val.first;
+ unsigned P =
+ Val.first == StoreChainContext::LocallyUnvectorizable
+ ? 0
+ : StoreChainContext::RangeSizesByIdx[Val.first];
if (P == 1)
return V;
return V + (P - Mean) * (P - Mean);
@@ -24695,144 +24966,73 @@ bool SLPVectorizerPass::vectorizeStores(
auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
int64_t PrevDist = -1;
+ unsigned GlobalMaxVF = 0;
+ StoreChainContext::RangeSizesByIdx.assign(StoreSeq.size(), 1);
+ SmallVector<std::unique_ptr<StoreChainContext>> AllContexts;
BoUpSLP::ValueList Operands;
- // Collect the chain into a list.
+ SmallVector<StoreChainContext::SizePair> RangeSizes;
for (auto [Idx, Data] : enumerate(StoreSeq)) {
auto &[Dist, InstIdx] = Data;
if (Operands.empty() || Dist - PrevDist == 1) {
Operands.push_back(Stores[InstIdx]);
+ RangeSizes.emplace_back(Idx, 1);
PrevDist = Dist;
if (Idx != StoreSeq.size() - 1)
continue;
}
- llvm::scope_exit E([&, &Dist = Dist, &InstIdx = InstIdx]() {
- Operands.clear();
+
+ if (Operands.size() > 1 ||
+ Visited
+ .insert({Operands.front(),
+ cast<StoreInst>(Operands.front())->getValueOperand(),
+ Operands.back(),
+ cast<StoreInst>(Operands.back())->getValueOperand(),
+ Operands.size()})
+ .second) {
+ AllContexts.emplace_back(
+ std::make_unique<StoreChainContext>(*TTI, Operands, RangeSizes));
+ if (!AllContexts.back()->initializeContext(R, *DL))
+ AllContexts.pop_back();
+ else
+ GlobalMaxVF = std::max(GlobalMaxVF, AllContexts.back()->MaxVF);
+ }
+ Operands.clear();
+ RangeSizes.clear();
+ if (Idx != StoreSeq.size() - 1) {
Operands.push_back(Stores[InstIdx]);
+ RangeSizes.emplace_back(Idx, 1);
PrevDist = Dist;
- });
-
- if (Operands.size() <= 1 ||
- !Visited
- .insert({Operands.front(),
- cast<StoreInst>(Operands.front())->getValueOperand(),
- Operands.back(),
- cast<StoreInst>(Operands.back())->getValueOperand(),
- Operands.size()})
- .second)
- continue;
-
- unsigned MaxVecRegSize = R.getMaxVecRegSize();
- unsigned EltSize = R.getVectorElementSize(Operands[0]);
- unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
-
- unsigned MaxVF =
- std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
- auto *Store = cast<StoreInst>(Operands[0]);
- Type *StoreTy = Store->getValueOperand()->getType();
- Type *ValueTy = StoreTy;
- if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
- ValueTy = Trunc->getSrcTy();
- // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
- // getStoreMinimumVF only support scalar type as arguments. As a result,
- // we need to use the element type of StoreTy and ValueTy to retrieve the
- // VF and then transform it back.
- // Remember: VF is defined as the number we want to vectorize, not the
- // number of elements in the final vector.
- Type *StoreScalarTy = StoreTy->getScalarType();
- unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
- R.getMinVF(DL->getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
- ValueTy->getScalarType()));
- MinVF /= getNumElements(StoreTy);
- MinVF = std::max<unsigned>(2, MinVF);
-
- if (MaxVF < MinVF) {
- LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
- << ") < "
- << "MinVF (" << MinVF << ")\n");
- continue;
}
+ }
- unsigned NonPowerOf2VF = 0;
- if (VectorizeNonPowerOf2) {
- // First try vectorizing with a non-power-of-2 VF. At the moment, only
- // consider cases where VF + 1 is a power-of-2, i.e. almost all vector
- // lanes are used.
- unsigned CandVF = std::clamp<unsigned>(Operands.size(), MinVF, MaxVF);
- if (has_single_bit(CandVF + 1)) {
- NonPowerOf2VF = CandVF;
- assert(NonPowerOf2VF != MaxVF &&
- "Non-power-of-2 VF should not be equal to MaxVF");
- }
- }
+ constexpr unsigned MaxAttempts = 4;
+ for (unsigned LimitVF = GlobalMaxVF; LimitVF > 0;
+ LimitVF = bit_ceil(LimitVF) / 2) {
+ for (const auto &CtxPtr : AllContexts) {
+ StoreChainContext &Context = *CtxPtr;
+ for (std::optional<unsigned> VFUnval = Context.getCurrentVF();
+ VFUnval && *VFUnval >= LimitVF; VFUnval = Context.getCurrentVF()) {
+ unsigned VF = *VFUnval;
- // MaxRegVF represents the number of instructions (scalar, or vector in
- // case of revec) that can be vectorized to naturally fit in a vector
- // register.
- unsigned MaxRegVF = MaxVF;
-
- MaxVF = std::min<unsigned>(MaxVF, bit_floor(Operands.size()));
- if (MaxVF < MinVF) {
- LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
- << ") < "
- << "MinVF (" << MinVF << ")\n");
- continue;
- }
-
- SmallVector<unsigned> CandidateVFs;
- for (unsigned VF = std::max(MaxVF, NonPowerOf2VF); VF >= MinVF;
- VF = divideCeil(VF, 2))
- CandidateVFs.push_back(VF);
-
- unsigned End = Operands.size();
- unsigned Repeat = 0;
- constexpr unsigned MaxAttempts = 4;
- // first: the best TreeSize from all prior loops over CandidateVFs, gets
- // updated after looping through CandidateVFs
- // second: the best TreeSize from all prior loops including the current
- // one
- llvm::SmallVector<std::pair<unsigned, unsigned>> RangeSizesStorage(
- Operands.size(), {1, 1});
- // The `slice` and `drop_front` interfaces are convenient
- const auto RangeSizes = MutableArrayRef(RangeSizesStorage);
- DenseMap<Value *, std::pair<unsigned, unsigned>> NonSchedulable;
- auto IsNotVectorized = [](const std::pair<unsigned, unsigned> &P) {
- return P.first > 0;
- };
- auto IsVectorized = [](const std::pair<unsigned, unsigned> &P) {
- return P.first == 0;
- };
- auto VFIsProfitable = [](unsigned Size,
- const std::pair<unsigned, unsigned> &P) {
- return Size >= P.first;
- };
- auto FirstSizeSame = [](unsigned Size,
- const std::pair<unsigned, unsigned> &P) {
- return Size == P.first;
- };
- while (true) {
- ++Repeat;
- bool RepeatChanged = false;
- bool AnyProfitableGraph = false;
- for (unsigned VF : CandidateVFs) {
- AnyProfitableGraph = false;
- unsigned FirstUnvecStore = std::distance(
- RangeSizes.begin(), find_if(RangeSizes, IsNotVectorized));
+ bool AnyProfitableGraph = false;
+ unsigned FirstUnvecStore = Context.getFirstUnvecStore();
// Form slices of size VF starting from FirstUnvecStore and try to
// vectorize them.
- while (FirstUnvecStore < End) {
- unsigned FirstVecStore = std::distance(
- RangeSizes.begin(),
- find_if(RangeSizes.drop_front(FirstUnvecStore), IsVectorized));
- unsigned MaxSliceEnd = FirstVecStore >= End ? End : FirstVecStore;
+ while (FirstUnvecStore < Context.End) {
+ unsigned FirstVecStore =
+ Context.getFirstVecStoreAfter(FirstUnvecStore);
+ unsigned MaxSliceEnd =
+ FirstVecStore >= Context.End ? Context.End : FirstVecStore;
for (unsigned SliceStartIdx = FirstUnvecStore;
SliceStartIdx + VF <= MaxSliceEnd;) {
- if (!checkTreeSizes(RangeSizes.slice(SliceStartIdx, VF))) {
+ if (!checkTreeSizes(
+ Context.RangeSizes.slice(SliceStartIdx, VF))) {
++SliceStartIdx;
continue;
}
ArrayRef<Value *> Slice =
- ArrayRef(Operands).slice(SliceStartIdx, VF);
+ ArrayRef(Context.Operands).slice(SliceStartIdx, VF);
assert(all_of(Slice,
[&](Value *V) {
return cast<StoreInst>(V)
@@ -24843,9 +25043,9 @@ bool SLPVectorizerPass::vectorizeStores(
->getType();
}) &&
"Expected all operands of same type.");
- if (!NonSchedulable.empty()) {
+ if (!Context.NonSchedulable.empty()) {
auto [NonSchedSizeMax, NonSchedSizeMin] =
- NonSchedulable.lookup(Slice.front());
+ Context.NonSchedulable.lookup(Slice.front());
if (NonSchedSizeMax > 0 && NonSchedSizeMin <= VF) {
// VF is too ambitious. Try to vectorize another slice before
// trying a smaller VF.
@@ -24854,12 +25054,12 @@ bool SLPVectorizerPass::vectorizeStores(
}
}
unsigned TreeSize;
- std::optional<bool> Res =
- vectorizeStoreChain(Slice, R, SliceStartIdx, MinVF, TreeSize);
+ std::optional<bool> Res = vectorizeStoreChain(
+ Slice, R, SliceStartIdx, Context.MinVF, TreeSize);
if (!Res) {
// Update the range of non schedulable VFs for slices starting
// at SliceStartIdx.
- NonSchedulable
+ Context.NonSchedulable
.try_emplace(Slice.front(), std::make_pair(VF, VF))
.first->getSecond()
.second = VF;
@@ -24867,101 +25067,74 @@ bool SLPVectorizerPass::vectorizeStores(
// Mark the vectorized stores so that we don't vectorize them
// again.
VectorizedStores.insert_range(Slice);
- AnyProfitableGraph = RepeatChanged = Changed = true;
+ AnyProfitableGraph = Context.RepeatChanged = Changed = true;
// If we vectorized initial block, no need to try to vectorize
// it again.
- for (std::pair<unsigned, unsigned> &P :
- RangeSizes.slice(SliceStartIdx, VF))
- P.first = P.second = 0;
- if (SliceStartIdx < FirstUnvecStore + MinVF) {
- for (std::pair<unsigned, unsigned> &P : RangeSizes.slice(
- FirstUnvecStore, SliceStartIdx - FirstUnvecStore))
- P.first = P.second = 0;
- FirstUnvecStore = SliceStartIdx + VF;
- }
- if (SliceStartIdx > MaxSliceEnd - VF - MinVF) {
- for (std::pair<unsigned, unsigned> &P :
- RangeSizes.slice(SliceStartIdx + VF,
- MaxSliceEnd - (SliceStartIdx + VF)))
- P.first = P.second = 0;
- if (MaxSliceEnd == End)
- End = SliceStartIdx;
- MaxSliceEnd = SliceStartIdx;
- }
+ Context.markRangeVectorized(SliceStartIdx, VF, FirstUnvecStore,
+ MaxSliceEnd);
SliceStartIdx += VF;
continue;
}
if (VF > 2 && Res &&
- !all_of(RangeSizes.slice(SliceStartIdx, VF),
- std::bind(VFIsProfitable, TreeSize, _1))) {
+ !Context.allOfRangeProfitable(SliceStartIdx, VF, TreeSize)) {
SliceStartIdx += VF;
continue;
}
// Check for the very big VFs that we're not rebuilding same
// trees, just with larger number of elements.
- if (VF > MaxRegVF && TreeSize > 1 &&
- all_of(RangeSizes.slice(SliceStartIdx, VF),
- std::bind(FirstSizeSame, TreeSize, _1))) {
+ if (VF > Context.MaxRegVF && TreeSize > 1 &&
+ Context.isFirstSizeSameRange(SliceStartIdx, VF, TreeSize)) {
SliceStartIdx += VF;
while (SliceStartIdx != MaxSliceEnd &&
- RangeSizes[SliceStartIdx].first == TreeSize)
+ Context.RangeSizes[SliceStartIdx].first == TreeSize)
++SliceStartIdx;
continue;
}
if (TreeSize > 1)
- for (std::pair<unsigned, unsigned> &P :
- RangeSizes.slice(SliceStartIdx, VF))
- P.second = std::max(P.second, TreeSize);
+ Context.updateCachedRangeSizes(SliceStartIdx, VF, TreeSize);
++SliceStartIdx;
AnyProfitableGraph = true;
}
- if (FirstUnvecStore >= End)
+ if (FirstUnvecStore >= Context.End)
break;
if (MaxSliceEnd - FirstUnvecStore < VF &&
- MaxSliceEnd - FirstUnvecStore >= MinVF)
+ MaxSliceEnd - FirstUnvecStore >= Context.MinVF)
AnyProfitableGraph = true;
- FirstUnvecStore = std::distance(
- RangeSizes.begin(),
- find_if(RangeSizes.drop_front(MaxSliceEnd), IsNotVectorized));
+ FirstUnvecStore = Context.getFirstUnvecStore(MaxSliceEnd);
}
- if (!AnyProfitableGraph && VF >= MaxRegVF && has_single_bit(VF))
+ if (!AnyProfitableGraph && VF >= Context.MaxRegVF &&
+ has_single_bit(VF))
break;
// For the MaxRegVF case, save RangeSizes to limit compile time
- if (VF == MaxRegVF)
- for (std::pair<unsigned, unsigned> &P : RangeSizes)
- if (P.first != 0)
- P.first = std::max(P.second, P.first);
- }
- // All values vectorized - exit.
- if (all_of(RangeSizes, IsVectorized))
- break;
- // Check if tried all attempts or no need for the last attempts at all.
- if (Repeat >= MaxAttempts ||
- (Repeat > 1 && (RepeatChanged || !AnyProfitableGraph)))
- break;
- constexpr unsigned StoresLimit = 64;
- const unsigned MaxTotalNum = std::min<unsigned>(
- Operands.size(),
- static_cast<unsigned>(
- End - std::distance(RangeSizes.begin(),
- find_if(RangeSizes, IsNotVectorized))));
- unsigned VF = bit_ceil(CandidateVFs.front()) * 2;
- if (VF > MaxTotalNum || VF >= StoresLimit)
- break;
- for (std::pair<unsigned, unsigned> &P : RangeSizes) {
- if (P.first != 0)
- P.first = std::max(P.second, P.first);
+ if (VF == Context.MaxRegVF)
+ Context.updateRangeSizesFromCache();
+
+ Context.incrementVF();
+ if (!Context.getCurrentVF()) {
+ // All values vectorized - exit.
+ if (Context.allVectorized()) {
+ Context.Done = true;
+ break;
+ }
+ // Check if tried all attempts or no need for the last attempts at
+ // all.
+ if (Context.Repeat >= MaxAttempts ||
+ (Context.Repeat > 1 &&
+ (Context.RepeatChanged || !AnyProfitableGraph))) {
+ Context.Done = true;
+ break;
+ }
+
+ if (!Context.updateCandidateVFs()) {
+ Context.Done = true;
+ break;
+ }
+ Context.updateRangeSizesFromCache();
+ }
}
- // Attempt again to vectorize even larger chains if all previous
- // attempts were unsuccessful because of the cost issues.
- CandidateVFs.clear();
- unsigned Limit =
- getFloorFullVectorNumberOfElements(*TTI, StoreTy, MaxTotalNum);
- if (bit_floor(Limit) == VF && Limit != VF)
- CandidateVFs.push_back(Limit);
- CandidateVFs.push_back(VF);
}
}
+ StoreChainContext::RangeSizesByIdx.clear();
};
/// Groups of stores to vectorize
>From a51e8f9772f7b07070a694c5150dad28647e6aac Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 11:54:48 -0700
Subject: [PATCH 04/16] Fix comment typos
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 336e1bdebf76f..7f5c3da09ecd0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24572,8 +24572,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
namespace {
/// A group of related stores which we are in the process of vectorizing,
-/// a subset of which may already vectorized. Stores context information
-/// about the group as a whole as well as information about what VF's need
+/// a subset of which may already be vectorized. Stores context information
+/// about the group as a whole as well as information about what VFs need
/// to be attempted still.
struct StoreChainContext {
using SizePair = std::pair<unsigned, unsigned>;
>From 1dd64f5ad19ef988916b69fb39b41e1d667f68be Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 11:58:59 -0700
Subject: [PATCH 05/16] Make a RangeSizesByIdx array for each TryToVectorize()
call
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 60 +++++++++++--------
1 file changed, 35 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7f5c3da09ecd0..6bc47210e6c0a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24622,7 +24622,7 @@ struct StoreChainContext {
/// RangeSize information for all elements in any chain
/// Needed since may be overlap between chains
- inline static SmallVector<unsigned> RangeSizesByIdx;
+ SmallVector<unsigned> &RangeSizesByIdx;
/// Element has not been vectorized, but due to the elements around it being
/// vectorized, it does not have enough neighboring elements to make a chain
/// longer than MinVF as part of the current Context
@@ -24631,24 +24631,26 @@ struct StoreChainContext {
explicit StoreChainContext(const TargetTransformInfo &TTI,
ArrayRef<Value *> Ops,
- ArrayRef<SizePair> RangeSizes)
- : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes) {}
+ ArrayRef<SizePair> RangeSizes,
+ SmallVector<unsigned> &RangeSizesByIdx)
+ : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes),
+ RangeSizesByIdx(RangeSizesByIdx) {}
- static bool isNotVectorized(const SizePair &P) {
+ bool isNotVectorized(const SizePair &P) const {
return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
}
- static bool isVectorized(const SizePair &P) {
+ bool isVectorized(const SizePair &P) const {
return P.first == LocallyUnvectorizable || RangeSizesByIdx[P.first] == 0;
}
- static bool vfIsProfitable(unsigned Size, const SizePair &P) {
+ bool vfIsProfitable(unsigned Size, const SizePair &P) const {
assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
"Cannot check profitability of vectorized element");
return Size >= RangeSizesByIdx[P.first];
}
- static bool firstSizeSame(unsigned Size, const SizePair &P) {
+ bool firstSizeSame(unsigned Size, const SizePair &P) const {
assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
"Cannot check profitability of vectorized element");
return Size == RangeSizesByIdx[P.first];
@@ -24658,33 +24660,41 @@ struct StoreChainContext {
unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
return std::distance(
RangeSizes.begin(),
- find_if(RangeSizes.drop_front(StartIdx), isNotVectorized));
+ find_if(RangeSizes.drop_front(StartIdx), [this](const SizePair &P) {
+ return this->isNotVectorized(P);
+ }));
}
// Return the index of the first vectorized store after \p StartIdx
unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
return std::distance(
RangeSizes.begin(),
- find_if(RangeSizes.drop_front(StartIdx), isVectorized));
+ find_if(RangeSizes.drop_front(StartIdx),
+ [this](const SizePair &P) { return this->isVectorized(P); }));
}
// Return true if all stores have been vectorized
- bool allVectorized() const { return all_of(RangeSizes, isVectorized); }
+ bool allVectorized() const {
+ return all_of(RangeSizes,
+ [this](const SizePair &P) { return this->isVectorized(P); });
+ }
// Return true if all elements in the given range match \p TreeSize
bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
unsigned TreeSize) const {
- return all_of(
- RangeSizes.slice(StartIdx, Length),
- [TreeSize](const SizePair &P) { return firstSizeSame(TreeSize, P); });
+ return all_of(RangeSizes.slice(StartIdx, Length),
+ [TreeSize, this](const SizePair &P) {
+ return firstSizeSame(TreeSize, P);
+ });
}
// Return true if the \p TreeSize is profitable for all elements in the range
bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
unsigned TreeSize) const {
- return all_of(
- RangeSizes.slice(StartIdx, Length),
- [TreeSize](const SizePair &P) { return vfIsProfitable(TreeSize, P); });
+ return all_of(RangeSizes.slice(StartIdx, Length),
+ [TreeSize, this](const SizePair &P) {
+ return vfIsProfitable(TreeSize, P);
+ });
}
// Update the live (first) range sizes from the cached values (second)
@@ -24837,14 +24847,15 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
} // namespace
/// Checks if the quadratic mean deviation is less than 90% of the mean size.
-static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
+static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
+ const SmallVector<unsigned> &RangeSizesByIdx) {
unsigned Num = 0;
uint64_t Sum = std::accumulate(
Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
[&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
unsigned Size = Val.first == StoreChainContext::LocallyUnvectorizable
? 0
- : StoreChainContext::RangeSizesByIdx[Val.first];
+ : RangeSizesByIdx[Val.first];
if (Size == 1)
return V;
++Num;
@@ -24861,7 +24872,7 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
unsigned P =
Val.first == StoreChainContext::LocallyUnvectorizable
? 0
- : StoreChainContext::RangeSizesByIdx[Val.first];
+ : RangeSizesByIdx[Val.first];
if (P == 1)
return V;
return V + (P - Mean) * (P - Mean);
@@ -24967,7 +24978,7 @@ bool SLPVectorizerPass::vectorizeStores(
auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
int64_t PrevDist = -1;
unsigned GlobalMaxVF = 0;
- StoreChainContext::RangeSizesByIdx.assign(StoreSeq.size(), 1);
+ SmallVector<unsigned> RangeSizesByIdx(StoreSeq.size(), 1);
SmallVector<std::unique_ptr<StoreChainContext>> AllContexts;
BoUpSLP::ValueList Operands;
SmallVector<StoreChainContext::SizePair> RangeSizes;
@@ -24989,8 +25000,8 @@ bool SLPVectorizerPass::vectorizeStores(
cast<StoreInst>(Operands.back())->getValueOperand(),
Operands.size()})
.second) {
- AllContexts.emplace_back(
- std::make_unique<StoreChainContext>(*TTI, Operands, RangeSizes));
+ AllContexts.emplace_back(std::make_unique<StoreChainContext>(
+ *TTI, Operands, RangeSizes, RangeSizesByIdx));
if (!AllContexts.back()->initializeContext(R, *DL))
AllContexts.pop_back();
else
@@ -25026,8 +25037,8 @@ bool SLPVectorizerPass::vectorizeStores(
FirstVecStore >= Context.End ? Context.End : FirstVecStore;
for (unsigned SliceStartIdx = FirstUnvecStore;
SliceStartIdx + VF <= MaxSliceEnd;) {
- if (!checkTreeSizes(
- Context.RangeSizes.slice(SliceStartIdx, VF))) {
+ if (!checkTreeSizes(Context.RangeSizes.slice(SliceStartIdx, VF),
+ RangeSizesByIdx)) {
++SliceStartIdx;
continue;
}
@@ -25134,7 +25145,6 @@ bool SLPVectorizerPass::vectorizeStores(
}
}
}
- StoreChainContext::RangeSizesByIdx.clear();
};
/// Groups of stores to vectorize
>From b57477155400f2d2aadf8de521683c49d7a47fdd Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:08:14 -0700
Subject: [PATCH 06/16] Fix logic error
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6bc47210e6c0a..1760c0785dda8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24992,7 +24992,7 @@ bool SLPVectorizerPass::vectorizeStores(
continue;
}
- if (Operands.size() > 1 ||
+ if (Operands.size() > 1 &&
Visited
.insert({Operands.front(),
cast<StoreInst>(Operands.front())->getValueOperand(),
>From 4c1154ba1b241e28ee492e75f3b1910a63ed61e9 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:44:55 -0700
Subject: [PATCH 07/16] Pass around TTI rather than storing it as part of
StoreContextChain
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++++----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1760c0785dda8..39994b63ce595 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24579,8 +24579,6 @@ struct StoreChainContext {
using SizePair = std::pair<unsigned, unsigned>;
using SizePairArrayRef = MutableArrayRef<SizePair>;
- const TargetTransformInfo &TTI;
-
/// For the StoreTy/Stride in the given group, what is the smallest VF
/// that can be used
unsigned MinVF = 0;
@@ -24629,11 +24627,10 @@ struct StoreChainContext {
inline static const unsigned LocallyUnvectorizable =
std::numeric_limits<unsigned>::max();
- explicit StoreChainContext(const TargetTransformInfo &TTI,
- ArrayRef<Value *> Ops,
+ explicit StoreChainContext(ArrayRef<Value *> Ops,
ArrayRef<SizePair> RangeSizes,
SmallVector<unsigned> &RangeSizesByIdx)
- : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes),
+ : Operands(Ops), RangeSizesStorage(RangeSizes),
RangeSizesByIdx(RangeSizesByIdx) {}
bool isNotVectorized(const SizePair &P) const {
@@ -24713,7 +24710,7 @@ struct StoreChainContext {
}
// Update CandidateVFs for secondary iterations
- bool updateCandidateVFs() {
+ bool updateCandidateVFs(const TargetTransformInfo &TTI) {
assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
constexpr unsigned StoresLimit = 64;
const unsigned MaxTotalNum = std::min<unsigned>(
@@ -24745,7 +24742,8 @@ struct StoreChainContext {
void incrementVF() { CandidateVFs.pop(); }
// Set up initial values using the already set Operands
- bool initializeContext(BoUpSLP &R, const DataLayout &DL);
+ bool initializeContext(BoUpSLP &R, const DataLayout &DL,
+ const TargetTransformInfo &TTI);
// Record vectorization of the provided range
void markRangeVectorized(unsigned StartIdx, unsigned Length,
@@ -24777,7 +24775,8 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
}
}
-bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
+bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
// Initialize range tracking in context.
RangeSizes = MutableArrayRef(RangeSizesStorage);
@@ -25001,8 +25000,8 @@ bool SLPVectorizerPass::vectorizeStores(
Operands.size()})
.second) {
AllContexts.emplace_back(std::make_unique<StoreChainContext>(
- *TTI, Operands, RangeSizes, RangeSizesByIdx));
- if (!AllContexts.back()->initializeContext(R, *DL))
+ Operands, RangeSizes, RangeSizesByIdx));
+ if (!AllContexts.back()->initializeContext(R, *DL, *TTI))
AllContexts.pop_back();
else
GlobalMaxVF = std::max(GlobalMaxVF, AllContexts.back()->MaxVF);
@@ -25136,7 +25135,7 @@ bool SLPVectorizerPass::vectorizeStores(
break;
}
- if (!Context.updateCandidateVFs()) {
+ if (!Context.updateCandidateVFs(*TTI)) {
Context.Done = true;
break;
}
>From ff033b0be6ace8912c330885a25637309d08983e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:46:02 -0700
Subject: [PATCH 08/16] Fix comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 39994b63ce595..dca1985dd5534 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24609,8 +24609,9 @@ struct StoreChainContext {
std::queue<unsigned> CandidateVFs;
/// Track the TreeSizes of prior vectorization attempts using each element,
/// to help us find early exit cases
- /// .first contains pointer into RangeSizesByIdx to help us track
+ /// - first: contains pointer into RangeSizesByIdx to help us track
/// vectorization of elements that belong to multiple chains
+ /// - second: contains cached TreeSize value for that element
SmallVector<SizePair> RangeSizesStorage;
SizePairArrayRef RangeSizes;
/// Store information about failed vectorization attempts due to scheduling
>From a0d84fb2b873155c6b5b31d71e9a89be74d69870 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:58:34 -0700
Subject: [PATCH 09/16] [SLP] Update DenseMap to be of type SmallDenseMap
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dca1985dd5534..b07833f331596 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24615,7 +24615,7 @@ struct StoreChainContext {
SmallVector<SizePair> RangeSizesStorage;
SizePairArrayRef RangeSizes;
/// Store information about failed vectorization attempts due to scheduling
- DenseMap<Value *, SizePair> NonSchedulable;
+ SmallDenseMap<Value *, SizePair> NonSchedulable;
/// Type of the Stores in `Operands`
Type *StoreTy = nullptr;
>From 7d513d3227d28e8102a5f767b1a10faf53c7aa50 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 14:06:44 -0700
Subject: [PATCH 10/16] [SLP] Remove excessive 'using' statement
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b07833f331596..16864171dc48a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24577,7 +24577,6 @@ namespace {
/// to be attempted still.
struct StoreChainContext {
using SizePair = std::pair<unsigned, unsigned>;
- using SizePairArrayRef = MutableArrayRef<SizePair>;
/// For the StoreTy/Stride in the given group, what is the smallest VF
/// that can be used
@@ -24613,7 +24612,7 @@ struct StoreChainContext {
/// vectorization of elements that belong to multiple chains
/// - second: contains cached TreeSize value for that element
SmallVector<SizePair> RangeSizesStorage;
- SizePairArrayRef RangeSizes;
+ MutableArrayRef<SizePair> RangeSizes;
/// Store information about failed vectorization attempts due to scheduling
SmallDenseMap<Value *, SizePair> NonSchedulable;
/// Type of the Stores in `Operands`
>From 3bb29e37907c9d1628485d3b875e4a8f27f16942 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 14:48:44 -0700
Subject: [PATCH 11/16] Remove excessive qualifications
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 16864171dc48a..3fc0b15550577 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24753,10 +24753,10 @@ struct StoreChainContext {
void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
unsigned &FirstUnvecStore,
unsigned &MaxSliceEnd) {
- for (StoreChainContext::SizePair &P : RangeSizes.slice(StartIdx, Length))
+ for (SizePair &P : RangeSizes.slice(StartIdx, Length))
RangeSizesByIdx[P.first] = P.second = 0;
if (StartIdx < FirstUnvecStore + MinVF) {
- for (StoreChainContext::SizePair &P :
+ for (SizePair &P :
RangeSizes.slice(FirstUnvecStore, StartIdx - FirstUnvecStore)) {
P.first = LocallyUnvectorizable;
P.second = 0;
@@ -24764,7 +24764,7 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
FirstUnvecStore = StartIdx + Length;
}
if (StartIdx + Length > MaxSliceEnd - MinVF) {
- for (StoreChainContext::SizePair &P : RangeSizes.slice(
+ for (SizePair &P : RangeSizes.slice(
StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
P.first = LocallyUnvectorizable;
P.second = 0;
>From a93809507fbfb7ef120888d0942ca502d9d4d0a4 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 15:40:05 -0700
Subject: [PATCH 12/16] Reorganize to make StoreChainContext a class
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 256 ++++++++++--------
1 file changed, 138 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3fc0b15550577..0f0b63ee2d0db 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24575,17 +24575,22 @@ namespace {
/// a subset of which may already be vectorized. Stores context information
/// about the group as a whole as well as information about what VFs need
/// to be attempted still.
-struct StoreChainContext {
+class StoreChainContext {
+public:
using SizePair = std::pair<unsigned, unsigned>;
+ /// In RangeSizes, element has not been vectorized, but due to the elements
+ /// around it being vectorized, it does not have enough neighboring elements
+ /// to make a chain longer than MinVF as part of the current Context
+ inline static const unsigned LocallyUnvectorizable =
+ std::numeric_limits<unsigned>::max();
+
/// For the StoreTy/Stride in the given group, what is the smallest VF
/// that can be used
unsigned MinVF = 0;
/// Maximum number of instructions that can be vectorized, either
/// constrained by register width or operands size.
unsigned MaxVF = 0;
- /// The largest VF checked in the current Repeat
- unsigned ProbeVF = 0;
/// MaxRegVF represents the number of instructions (scalar, or vector in
/// case of revec) that can be vectorized to naturally fit in a vector
/// register.
@@ -24601,31 +24606,11 @@ struct StoreChainContext {
/// What element index is the end of the to be vectorized Operands
/// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
unsigned End = 0;
-
/// Stores that compose this chain
BoUpSLP::ValueList Operands;
- /// Which VFs do we want to attempt for this chain
- std::queue<unsigned> CandidateVFs;
- /// Track the TreeSizes of prior vectorization attempts using each element,
- /// to help us find early exit cases
- /// - first: contains pointer into RangeSizesByIdx to help us track
- /// vectorization of elements that belong to multiple chains
- /// - second: contains cached TreeSize value for that element
- SmallVector<SizePair> RangeSizesStorage;
MutableArrayRef<SizePair> RangeSizes;
/// Store information about failed vectorization attempts due to scheduling
SmallDenseMap<Value *, SizePair> NonSchedulable;
- /// Type of the Stores in `Operands`
- Type *StoreTy = nullptr;
-
- /// RangeSize information for all elements in any chain
- /// Needed since may be overlap between chains
- SmallVector<unsigned> &RangeSizesByIdx;
- /// Element has not been vectorized, but due to the elements around it being
- /// vectorized, it does not have enough neighboring elements to make a chain
- /// longer than MinVF as part of the current Context
- inline static const unsigned LocallyUnvectorizable =
- std::numeric_limits<unsigned>::max();
explicit StoreChainContext(ArrayRef<Value *> Ops,
ArrayRef<SizePair> RangeSizes,
@@ -24633,6 +24618,37 @@ struct StoreChainContext {
: Operands(Ops), RangeSizesStorage(RangeSizes),
RangeSizesByIdx(RangeSizesByIdx) {}
+ // Set up initial values using the already set Operands
+ bool initializeContext(BoUpSLP &R, const DataLayout &DL,
+ const TargetTransformInfo &TTI);
+ // Return the index of the first unvectorized store after \p StartIdx
+ unsigned getFirstUnvecStore(unsigned StartIdx = 0) const;
+ // Return the index of the first vectorized store after \p StartIdx
+ unsigned getFirstVecStoreAfter(unsigned StartIdx) const;
+ // Return true if all stores have been vectorized
+ bool allVectorized() const;
+ // Return true if all elements in the given range match \p TreeSize
+ bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const;
+ // Return true if the \p TreeSize is profitable for all elements in the range
+ bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const;
+ // Update the live (first) range sizes from the cached values (second)
+ void updateRangeSizesFromCache();
+ // Update the cached (second) range sizes with the given \p TreeSize
+ void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize);
+ // Update CandidateVFs for secondary iterations
+ bool updateCandidateVFs(const TargetTransformInfo &TTI);
+ // Get the current VF
+ std::optional<unsigned> getCurrentVF() const;
+ // Remove the current VF from the queue
+ void incrementVF() { CandidateVFs.pop(); }
+ // Record vectorization of the provided range
+ void markRangeVectorized(unsigned StartIdx, unsigned Length,
+ unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+
+private:
bool isNotVectorized(const SizePair &P) const {
return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
}
@@ -24653,101 +24669,21 @@ struct StoreChainContext {
return Size == RangeSizesByIdx[P.first];
}
- // Return the index of the first unvectorized store after \p StartIdx
- unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
- return std::distance(
- RangeSizes.begin(),
- find_if(RangeSizes.drop_front(StartIdx), [this](const SizePair &P) {
- return this->isNotVectorized(P);
- }));
- }
-
- // Return the index of the first vectorized store after \p StartIdx
- unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
- return std::distance(
- RangeSizes.begin(),
- find_if(RangeSizes.drop_front(StartIdx),
- [this](const SizePair &P) { return this->isVectorized(P); }));
- }
-
- // Return true if all stores have been vectorized
- bool allVectorized() const {
- return all_of(RangeSizes,
- [this](const SizePair &P) { return this->isVectorized(P); });
- }
-
- // Return true if all elements in the given range match \p TreeSize
- bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
- unsigned TreeSize) const {
- return all_of(RangeSizes.slice(StartIdx, Length),
- [TreeSize, this](const SizePair &P) {
- return firstSizeSame(TreeSize, P);
- });
- }
-
- // Return true if the \p TreeSize is profitable for all elements in the range
- bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
- unsigned TreeSize) const {
- return all_of(RangeSizes.slice(StartIdx, Length),
- [TreeSize, this](const SizePair &P) {
- return vfIsProfitable(TreeSize, P);
- });
- }
-
- // Update the live (first) range sizes from the cached values (second)
- void updateRangeSizesFromCache() {
- for (SizePair &P : RangeSizes) {
- if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
- RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
- }
- }
-
- // Update the cached (second) range sizes with the given \p TreeSize
- void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
- unsigned TreeSize) {
- for (SizePair &P : RangeSizes.slice(StartIdx, Length))
- P.second = std::max(P.second, TreeSize);
- }
-
- // Update CandidateVFs for secondary iterations
- bool updateCandidateVFs(const TargetTransformInfo &TTI) {
- assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
- constexpr unsigned StoresLimit = 64;
- const unsigned MaxTotalNum = std::min<unsigned>(
- Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
- unsigned VF = bit_ceil(ProbeVF) * 2;
- if (VF > MaxTotalNum || VF >= StoresLimit)
- return false;
- // Attempt again to vectorize even larger chains if all previous
- // attempts were unsuccessful because of the cost issues.
- unsigned Limit =
- getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
- if (bit_floor(Limit) == VF && Limit != VF)
- CandidateVFs.push(Limit);
- CandidateVFs.push(VF);
- ProbeVF = CandidateVFs.front();
- ++Repeat;
- RepeatChanged = false;
- return true;
- }
-
- // Get the current VF
- std::optional<unsigned> getCurrentVF() const {
- if (Done || CandidateVFs.empty())
- return std::nullopt;
- return CandidateVFs.front();
- }
-
- // Increment the VF-Index counter, return false if at end of CandidateVFs
- void incrementVF() { CandidateVFs.pop(); }
-
- // Set up initial values using the already set Operands
- bool initializeContext(BoUpSLP &R, const DataLayout &DL,
- const TargetTransformInfo &TTI);
-
- // Record vectorization of the provided range
- void markRangeVectorized(unsigned StartIdx, unsigned Length,
- unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+ /// The largest VF checked in the current Repeat
+ unsigned ProbeVF = 0;
+ /// Type of the Stores in `Operands`
+ Type *StoreTy = nullptr;
+ /// Which VFs do we want to attempt for this chain
+ std::queue<unsigned> CandidateVFs;
+ /// Track the TreeSizes of prior vectorization attempts using each element,
+ /// to help us find early exit cases
+ /// - first: contains pointer into RangeSizesByIdx to help us track
+ /// vectorization of elements that belong to multiple chains
+ /// - second: contains cached TreeSize value for that element
+ SmallVector<SizePair> RangeSizesStorage;
+ /// RangeSize information for all elements in any chain
+ /// Needed since may be overlap between chains
+ SmallVector<unsigned> &RangeSizesByIdx;
};
void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
@@ -24843,6 +24779,90 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
ProbeVF = MaxVF;
return true;
}
+
+// Return the index of the first unvectorized store after \p StartIdx
+unsigned StoreChainContext::getFirstUnvecStore(unsigned StartIdx) const {
+ return std::distance(
+ RangeSizes.begin(),
+ find_if(RangeSizes.drop_front(StartIdx),
+ [this](const SizePair &P) { return this->isNotVectorized(P); }));
+}
+
+// Return the index of the first vectorized store after \p StartIdx
+unsigned StoreChainContext::getFirstVecStoreAfter(unsigned StartIdx) const {
+ return std::distance(
+ RangeSizes.begin(),
+ find_if(RangeSizes.drop_front(StartIdx),
+ [this](const SizePair &P) { return this->isVectorized(P); }));
+}
+
+// Return true if all stores have been vectorized
+bool StoreChainContext::allVectorized() const {
+ return all_of(RangeSizes,
+ [this](const SizePair &P) { return this->isVectorized(P); });
+}
+
+// Return true if all elements in the given range match \p TreeSize
+bool StoreChainContext::isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const {
+ return all_of(RangeSizes.slice(StartIdx, Length),
+ [TreeSize, this](const SizePair &P) {
+ return firstSizeSame(TreeSize, P);
+ });
+}
+
+// Return true if the \p TreeSize is profitable for all elements in the range
+bool StoreChainContext::allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+ unsigned TreeSize) const {
+ return all_of(RangeSizes.slice(StartIdx, Length),
+ [TreeSize, this](const SizePair &P) {
+ return vfIsProfitable(TreeSize, P);
+ });
+}
+
+// Update the live (first) range sizes from the cached values (second)
+void StoreChainContext::updateRangeSizesFromCache() {
+ for (SizePair &P : RangeSizes) {
+ if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
+ RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
+ }
+}
+
+// Update the cached (second) range sizes with the given \p TreeSize
+void StoreChainContext::updateCachedRangeSizes(unsigned StartIdx,
+ unsigned Length,
+ unsigned TreeSize) {
+ for (SizePair &P : RangeSizes.slice(StartIdx, Length))
+ P.second = std::max(P.second, TreeSize);
+}
+
+bool StoreChainContext::updateCandidateVFs(const TargetTransformInfo &TTI) {
+ assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
+ constexpr unsigned StoresLimit = 64;
+ const unsigned MaxTotalNum = std::min<unsigned>(
+ Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
+ unsigned VF = bit_ceil(ProbeVF) * 2;
+ if (VF > MaxTotalNum || VF >= StoresLimit)
+ return false;
+ // Attempt again to vectorize even larger chains if all previous
+ // attempts were unsuccessful because of the cost issues.
+ unsigned Limit =
+ getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
+ if (bit_floor(Limit) == VF && Limit != VF)
+ CandidateVFs.push(Limit);
+ CandidateVFs.push(VF);
+ ProbeVF = CandidateVFs.front();
+ ++Repeat;
+ RepeatChanged = false;
+ return true;
+}
+
+// Get the current VF
+std::optional<unsigned> StoreChainContext::getCurrentVF() const {
+ if (Done || CandidateVFs.empty())
+ return std::nullopt;
+ return CandidateVFs.front();
+}
} // namespace
/// Checks if the quadratic mean deviation is less than 90% of the mean size.
>From 43790c74428b1813381aa9df415e4c092bf59675 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 15:46:19 -0700
Subject: [PATCH 13/16] Refactor to remove StoreContextChain::Done
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f0b63ee2d0db..03492a4e5c47e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24600,9 +24600,6 @@ class StoreChainContext {
unsigned Repeat = 1;
/// Did any vectorization occur for the current iteration over CandidateVFs
bool RepeatChanged = false;
- /// Are we finished checking this StoreChainContext? Can be due to all VFs
- /// being checked, or an early exit condition
- bool Done = false;
/// What element index is the end of the to be vectorized Operands
/// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
unsigned End = 0;
@@ -24743,7 +24740,6 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
<< ") < "
<< "MinVF (" << MinVF << ")\n");
- Done = true;
return false;
}
@@ -24767,7 +24763,6 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
<< ") < "
<< "MinVF (" << MinVF << ")\n");
- Done = true;
return false;
}
@@ -24859,7 +24854,7 @@ bool StoreChainContext::updateCandidateVFs(const TargetTransformInfo &TTI) {
// Get the current VF
std::optional<unsigned> StoreChainContext::getCurrentVF() const {
- if (Done || CandidateVFs.empty())
+ if (CandidateVFs.empty())
return std::nullopt;
return CandidateVFs.front();
}
@@ -25038,7 +25033,9 @@ bool SLPVectorizerPass::vectorizeStores(
constexpr unsigned MaxAttempts = 4;
for (unsigned LimitVF = GlobalMaxVF; LimitVF > 0;
LimitVF = bit_ceil(LimitVF) / 2) {
- for (const auto &CtxPtr : AllContexts) {
+ for (auto &CtxPtr : AllContexts) {
+ if (!CtxPtr)
+ break;
StoreChainContext &Context = *CtxPtr;
for (std::optional<unsigned> VFUnval = Context.getCurrentVF();
VFUnval && *VFUnval >= LimitVF; VFUnval = Context.getCurrentVF()) {
@@ -25143,7 +25140,7 @@ bool SLPVectorizerPass::vectorizeStores(
if (!Context.getCurrentVF()) {
// All values vectorized - exit.
if (Context.allVectorized()) {
- Context.Done = true;
+ CtxPtr.reset();
break;
}
// Check if tried all attempts or no need for the last attempts at
@@ -25151,12 +25148,12 @@ bool SLPVectorizerPass::vectorizeStores(
if (Context.Repeat >= MaxAttempts ||
(Context.Repeat > 1 &&
(Context.RepeatChanged || !AnyProfitableGraph))) {
- Context.Done = true;
+ CtxPtr.reset();
break;
}
if (!Context.updateCandidateVFs(*TTI)) {
- Context.Done = true;
+ CtxPtr.reset();
break;
}
Context.updateRangeSizesFromCache();
>From 3099d36d628acab5caa3d0c87ddbf07616c23dab Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:19:03 -0700
Subject: [PATCH 14/16] Make CheckTreeSizes part for StoreChainContext
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 03492a4e5c47e..476239cc3026e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24644,6 +24644,8 @@ class StoreChainContext {
// Record vectorization of the provided range
void markRangeVectorized(unsigned StartIdx, unsigned Length,
unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+ bool checkTreeSizes(const unsigned SliceStartIdx,
+ const unsigned VF) const;
private:
bool isNotVectorized(const SizePair &P) const {
@@ -24858,11 +24860,11 @@ std::optional<unsigned> StoreChainContext::getCurrentVF() const {
return std::nullopt;
return CandidateVFs.front();
}
-} // namespace
/// Checks if the quadratic mean deviation is less than 90% of the mean size.
-static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
- const SmallVector<unsigned> &RangeSizesByIdx) {
+bool StoreChainContext::checkTreeSizes(const unsigned SliceStartIdx,
+ const unsigned VF) const {
+ auto Sizes = RangeSizes.slice(SliceStartIdx, VF);
unsigned Num = 0;
uint64_t Sum = std::accumulate(
Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
@@ -24895,8 +24897,6 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
return Dev * 96 / (Mean * Mean) == 0;
}
-namespace {
-
/// A group of stores that we'll try to bundle together using vector ops.
/// They are ordered using the signed distance of their address operand to the
/// address of this group's BaseInstr.
@@ -25053,8 +25053,7 @@ bool SLPVectorizerPass::vectorizeStores(
FirstVecStore >= Context.End ? Context.End : FirstVecStore;
for (unsigned SliceStartIdx = FirstUnvecStore;
SliceStartIdx + VF <= MaxSliceEnd;) {
- if (!checkTreeSizes(Context.RangeSizes.slice(SliceStartIdx, VF),
- RangeSizesByIdx)) {
+ if (!Context.checkTreeSizes(SliceStartIdx, VF)) {
++SliceStartIdx;
continue;
}
>From e64daf224dba17366e82e06c43b456d026eb227e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:27:30 -0700
Subject: [PATCH 15/16] Refactor to make RangeSizes private
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 476239cc3026e..6629a65dc0c4c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24605,7 +24605,6 @@ class StoreChainContext {
unsigned End = 0;
/// Stores that compose this chain
BoUpSLP::ValueList Operands;
- MutableArrayRef<SizePair> RangeSizes;
/// Store information about failed vectorization attempts due to scheduling
SmallDenseMap<Value *, SizePair> NonSchedulable;
@@ -24680,6 +24679,7 @@ class StoreChainContext {
/// vectorization of elements that belong to multiple chains
/// - second: contains cached TreeSize value for that element
SmallVector<SizePair> RangeSizesStorage;
+ MutableArrayRef<SizePair> RangeSizes;
/// RangeSize information for all elements in any chain
/// Needed since may be overlap between chains
SmallVector<unsigned> &RangeSizesByIdx;
@@ -25112,7 +25112,7 @@ bool SLPVectorizerPass::vectorizeStores(
Context.isFirstSizeSameRange(SliceStartIdx, VF, TreeSize)) {
SliceStartIdx += VF;
while (SliceStartIdx != MaxSliceEnd &&
- Context.RangeSizes[SliceStartIdx].first == TreeSize)
+ Context.isFirstSizeSameRange(SliceStartIdx, 1, TreeSize))
++SliceStartIdx;
continue;
}
>From ce4335984c1fd9a1bd4ecac556cb987c40660a61 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:56:30 -0700
Subject: [PATCH 16/16] Lint
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6629a65dc0c4c..6d9f888a303f5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24643,8 +24643,7 @@ class StoreChainContext {
// Record vectorization of the provided range
void markRangeVectorized(unsigned StartIdx, unsigned Length,
unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
- bool checkTreeSizes(const unsigned SliceStartIdx,
- const unsigned VF) const;
+ bool checkTreeSizes(const unsigned SliceStartIdx, const unsigned VF) const;
private:
bool isNotVectorized(const SizePair &P) const {
@@ -24699,8 +24698,8 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
FirstUnvecStore = StartIdx + Length;
}
if (StartIdx + Length > MaxSliceEnd - MinVF) {
- for (SizePair &P : RangeSizes.slice(
- StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
+ for (SizePair &P : RangeSizes.slice(StartIdx + Length,
+ MaxSliceEnd - (StartIdx + Length))) {
P.first = LocallyUnvectorizable;
P.second = 0;
}
More information about the llvm-commits
mailing list