[llvm] [SLP][NFC] Refactor to prepare for constant stride stores (PR #185997)

Ryan Buchner via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 17 16:59:44 PDT 2026


https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/185997

>From 6b21baf3fcea35a1e7fe817655d1cc58743a2a1b Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 9 Mar 2026 23:18:46 -0700
Subject: [PATCH 01/16] [SLP][NFC] Fix typo

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 97ba8eee6742c..5a7be54f122bb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21148,7 +21148,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         Value *StrideVal;
         const StridedPtrInfo &SPtrInfo = TreeEntryToStridedPtrInfoMap.at(E);
         StridedLoadTy = SPtrInfo.Ty;
-        assert(StridedLoadTy && "Missing StridedPoinerInfo for tree entry.");
+        assert(StridedLoadTy && "Missing StridedPointerInfo for tree entry.");
         unsigned StridedLoadEC =
             StridedLoadTy->getElementCount().getKnownMinValue();
 

>From 688f4b23822ba2c17bd6fbd2c2525f2b84e3de2a Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 10 Mar 2026 15:55:31 -0700
Subject: [PATCH 02/16] [SLP][NFC] Fix off by one error in MaxTotalNum
 calculation

Potentially sligh performance improvement.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5a7be54f122bb..0cab5d5c615eb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24943,10 +24943,8 @@ bool SLPVectorizerPass::vectorizeStores(
         const unsigned MaxTotalNum = std::min<unsigned>(
             Operands.size(),
             static_cast<unsigned>(
-                End -
-                std::distance(RangeSizes.begin(),
-                              find_if(RangeSizes, IsNotVectorized)) +
-                1));
+                End - std::distance(RangeSizes.begin(),
+                                    find_if(RangeSizes, IsNotVectorized))));
         unsigned VF = bit_ceil(CandidateVFs.front()) * 2;
         if (VF > MaxTotalNum || VF >= StoresLimit)
           break;

>From 0a2c7e7af0a21be0071a69140ac13720a056c15e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 27 Feb 2026 12:20:39 -0800
Subject: [PATCH 03/16] [SLP][NFC] Refactor vectorizeStores() to create all
 chains first, then vectorize by VF

Try vectorizing all chains with VF=MaxVF, then VF=MaxVF/2, etc. Will be important
when strided stores come about since chains will overlap.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 549 ++++++++++++------
 1 file changed, 361 insertions(+), 188 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0cab5d5c615eb..336e1bdebf76f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24570,13 +24570,281 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
   return false;
 }
 
+namespace {
+/// A group of related stores which we are in the process of vectorizing,
+/// a subset of which may already vectorized. Stores context information
+/// about the group as a whole as well as information about what VF's need
+/// to be attempted still.
+struct StoreChainContext {
+  using SizePair = std::pair<unsigned, unsigned>;
+  using SizePairArrayRef = MutableArrayRef<SizePair>;
+
+  const TargetTransformInfo &TTI;
+
+  /// For the StoreTy/Stride in the given group, what is the smallest VF
+  /// that can be used
+  unsigned MinVF = 0;
+  /// Maximum number of instructions that can be vectorized, either
+  /// constrained by register width or operands size.
+  unsigned MaxVF = 0;
+  /// The largest VF checked in the current Repeat
+  unsigned ProbeVF = 0;
+  /// MaxRegVF represents the number of instructions (scalar, or vector in
+  /// case of revec) that can be vectorized to naturally fit in a vector
+  /// register.
+  unsigned MaxRegVF = 0;
+  /// How many times has CandidateVFs been refilled, prevents excessive
+  /// attempts at vectorizing large VFs
+  unsigned Repeat = 1;
+  /// Did any vectorization occur for the current iteration over CandidateVFs
+  bool RepeatChanged = false;
+  /// Are we finished checking this StoreChainContext? Can be due to all VFs
+  /// being checked, or an early exit condition
+  bool Done = false;
+  /// What element index is the end of the to be vectorized Operands
+  /// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
+  unsigned End = 0;
+
+  /// Stores that compose this chain
+  BoUpSLP::ValueList Operands;
+  /// Which VFs do we want to attempt for this chain
+  std::queue<unsigned> CandidateVFs;
+  /// Track the TreeSizes of prior vectorization attempts using each element,
+  /// to help us find early exit cases
+  /// .first contains pointer into RangeSizesByIdx to help us track
+  /// vectorization of elements that belong to multiple chains
+  SmallVector<SizePair> RangeSizesStorage;
+  SizePairArrayRef RangeSizes;
+  /// Store information about failed vectorization attempts due to scheduling
+  DenseMap<Value *, SizePair> NonSchedulable;
+  /// Type of the Stores in `Operands`
+  Type *StoreTy = nullptr;
+
+  /// RangeSize information for all elements in any chain
+  /// Needed since may be overlap between chains
+  inline static SmallVector<unsigned> RangeSizesByIdx;
+  /// Element has not been vectorized, but due to the elements around it being
+  /// vectorized, it does not have enough neighboring elements to make a chain
+  /// longer than MinVF as part of the current Context
+  inline static const unsigned LocallyUnvectorizable =
+      std::numeric_limits<unsigned>::max();
+
+  explicit StoreChainContext(const TargetTransformInfo &TTI,
+                             ArrayRef<Value *> Ops,
+                             ArrayRef<SizePair> RangeSizes)
+      : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes) {}
+
+  static bool isNotVectorized(const SizePair &P) {
+    return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
+  }
+
+  static bool isVectorized(const SizePair &P) {
+    return P.first == LocallyUnvectorizable || RangeSizesByIdx[P.first] == 0;
+  }
+
+  static bool vfIsProfitable(unsigned Size, const SizePair &P) {
+    assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
+           "Cannot check profitability of vectorized element");
+    return Size >= RangeSizesByIdx[P.first];
+  }
+
+  static bool firstSizeSame(unsigned Size, const SizePair &P) {
+    assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
+           "Cannot check profitability of vectorized element");
+    return Size == RangeSizesByIdx[P.first];
+  }
+
+  // Return the index of the first unvectorized store after \p StartIdx
+  unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
+    return std::distance(
+        RangeSizes.begin(),
+        find_if(RangeSizes.drop_front(StartIdx), isNotVectorized));
+  }
+
+  // Return the index of the first vectorized store after \p StartIdx
+  unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
+    return std::distance(
+        RangeSizes.begin(),
+        find_if(RangeSizes.drop_front(StartIdx), isVectorized));
+  }
+
+  // Return true if all stores have been vectorized
+  bool allVectorized() const { return all_of(RangeSizes, isVectorized); }
+
+  // Return true if all elements in the given range match \p TreeSize
+  bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+                            unsigned TreeSize) const {
+    return all_of(
+        RangeSizes.slice(StartIdx, Length),
+        [TreeSize](const SizePair &P) { return firstSizeSame(TreeSize, P); });
+  }
+
+  // Return true if the \p TreeSize is profitable for all elements in the range
+  bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+                            unsigned TreeSize) const {
+    return all_of(
+        RangeSizes.slice(StartIdx, Length),
+        [TreeSize](const SizePair &P) { return vfIsProfitable(TreeSize, P); });
+  }
+
+  // Update the live (first) range sizes from the cached values (second)
+  void updateRangeSizesFromCache() {
+    for (SizePair &P : RangeSizes) {
+      if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
+        RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
+    }
+  }
+
+  // Update the cached (second) range sizes with the given \p TreeSize
+  void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
+                              unsigned TreeSize) {
+    for (SizePair &P : RangeSizes.slice(StartIdx, Length))
+      P.second = std::max(P.second, TreeSize);
+  }
+
+  // Update CandidateVFs for secondary iterations
+  bool updateCandidateVFs() {
+    assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
+    constexpr unsigned StoresLimit = 64;
+    const unsigned MaxTotalNum = std::min<unsigned>(
+        Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
+    unsigned VF = bit_ceil(ProbeVF) * 2;
+    if (VF > MaxTotalNum || VF >= StoresLimit)
+      return false;
+    // Attempt again to vectorize even larger chains if all previous
+    // attempts were unsuccessful because of the cost issues.
+    unsigned Limit =
+        getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
+    if (bit_floor(Limit) == VF && Limit != VF)
+      CandidateVFs.push(Limit);
+    CandidateVFs.push(VF);
+    ProbeVF = CandidateVFs.front();
+    ++Repeat;
+    RepeatChanged = false;
+    return true;
+  }
+
+  // Get the current VF
+  std::optional<unsigned> getCurrentVF() const {
+    if (Done || CandidateVFs.empty())
+      return std::nullopt;
+    return CandidateVFs.front();
+  }
+
+  // Increment the VF-Index counter, return false if at end of CandidateVFs
+  void incrementVF() { CandidateVFs.pop(); }
+
+  // Set up initial values using the already set Operands
+  bool initializeContext(BoUpSLP &R, const DataLayout &DL);
+
+  // Record vectorization of the provided range
+  void markRangeVectorized(unsigned StartIdx, unsigned Length,
+                           unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+};
+
+void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
+                                            unsigned &FirstUnvecStore,
+                                            unsigned &MaxSliceEnd) {
+  for (StoreChainContext::SizePair &P : RangeSizes.slice(StartIdx, Length))
+    RangeSizesByIdx[P.first] = P.second = 0;
+  if (StartIdx < FirstUnvecStore + MinVF) {
+    for (StoreChainContext::SizePair &P :
+         RangeSizes.slice(FirstUnvecStore, StartIdx - FirstUnvecStore)) {
+      P.first = LocallyUnvectorizable;
+      P.second = 0;
+    }
+    FirstUnvecStore = StartIdx + Length;
+  }
+  if (StartIdx + Length > MaxSliceEnd - MinVF) {
+    for (StoreChainContext::SizePair &P : RangeSizes.slice(
+             StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
+      P.first = LocallyUnvectorizable;
+      P.second = 0;
+    }
+    if (MaxSliceEnd == End)
+      End = StartIdx;
+    MaxSliceEnd = StartIdx;
+  }
+}
+
+bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
+  // Initialize range tracking in context.
+  RangeSizes = MutableArrayRef(RangeSizesStorage);
+
+  unsigned MaxVecRegSize = R.getMaxVecRegSize();
+  unsigned EltSize = R.getVectorElementSize(Operands[0]);
+  unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
+
+  MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
+  auto *Store = cast<StoreInst>(Operands[0]);
+  StoreTy = Store->getValueOperand()->getType();
+  Type *ValueTy = StoreTy;
+  if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
+    ValueTy = Trunc->getSrcTy();
+  // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
+  // getStoreMinimumVF only support scalar type as arguments. As a result,
+  // we need to use the element type of StoreTy and ValueTy to retrieve the
+  // VF and then transform it back.
+  // Remember: VF is defined as the number we want to vectorize, not the
+  // number of elements in the final vector.
+  Type *StoreScalarTy = StoreTy->getScalarType();
+  MinVF = PowerOf2Ceil(TTI.getStoreMinimumVF(
+      R.getMinVF(DL.getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
+      ValueTy->getScalarType()));
+  MinVF /= getNumElements(StoreTy);
+  MinVF = std::max<unsigned>(2, MinVF);
+
+  if (MaxVF < MinVF) {
+    LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
+                      << ") < "
+                      << "MinVF (" << MinVF << ")\n");
+    Done = true;
+    return false;
+  }
+
+  unsigned NonPowerOf2VF = 0;
+  if (VectorizeNonPowerOf2) {
+    // First try vectorizing with a non-power-of-2 VF. At the moment, only
+    // consider cases where VF + 1 is a power-of-2, i.e. almost all vector
+    // lanes are used.
+    unsigned CandVF = std::clamp<unsigned>(Operands.size(), MinVF, MaxVF);
+    if (has_single_bit(CandVF + 1)) {
+      NonPowerOf2VF = CandVF;
+      assert(NonPowerOf2VF != MaxVF &&
+             "Non-power-of-2 VF should not be equal to MaxVF");
+    }
+  }
+
+  MaxRegVF = MaxVF;
+
+  MaxVF = std::min<unsigned>(MaxVF, bit_floor(Operands.size()));
+  if (MaxVF < MinVF) {
+    LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
+                      << ") < "
+                      << "MinVF (" << MinVF << ")\n");
+    Done = true;
+    return false;
+  }
+
+  for (unsigned VF = std::max(MaxVF, NonPowerOf2VF); VF >= MinVF;
+       VF = divideCeil(VF, 2))
+    CandidateVFs.push(VF);
+
+  End = Operands.size();
+  ProbeVF = MaxVF;
+  return true;
+}
+} // namespace
+
 /// Checks if the quadratic mean deviation is less than 90% of the mean size.
 static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
   unsigned Num = 0;
   uint64_t Sum = std::accumulate(
       Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
       [&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
-        unsigned Size = Val.first;
+        unsigned Size = Val.first == StoreChainContext::LocallyUnvectorizable
+                            ? 0
+                            : StoreChainContext::RangeSizesByIdx[Val.first];
         if (Size == 1)
           return V;
         ++Num;
@@ -24590,7 +24858,10 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
   uint64_t Dev = std::accumulate(
                      Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
                      [&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
-                       unsigned P = Val.first;
+                       unsigned P =
+                           Val.first == StoreChainContext::LocallyUnvectorizable
+                               ? 0
+                               : StoreChainContext::RangeSizesByIdx[Val.first];
                        if (P == 1)
                          return V;
                        return V + (P - Mean) * (P - Mean);
@@ -24695,144 +24966,73 @@ bool SLPVectorizerPass::vectorizeStores(
 
   auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
     int64_t PrevDist = -1;
+    unsigned GlobalMaxVF = 0;
+    StoreChainContext::RangeSizesByIdx.assign(StoreSeq.size(), 1);
+    SmallVector<std::unique_ptr<StoreChainContext>> AllContexts;
     BoUpSLP::ValueList Operands;
-    // Collect the chain into a list.
+    SmallVector<StoreChainContext::SizePair> RangeSizes;
     for (auto [Idx, Data] : enumerate(StoreSeq)) {
       auto &[Dist, InstIdx] = Data;
       if (Operands.empty() || Dist - PrevDist == 1) {
         Operands.push_back(Stores[InstIdx]);
+        RangeSizes.emplace_back(Idx, 1);
         PrevDist = Dist;
         if (Idx != StoreSeq.size() - 1)
           continue;
       }
-      llvm::scope_exit E([&, &Dist = Dist, &InstIdx = InstIdx]() {
-        Operands.clear();
+
+      if (Operands.size() > 1 ||
+          Visited
+              .insert({Operands.front(),
+                       cast<StoreInst>(Operands.front())->getValueOperand(),
+                       Operands.back(),
+                       cast<StoreInst>(Operands.back())->getValueOperand(),
+                       Operands.size()})
+              .second) {
+        AllContexts.emplace_back(
+            std::make_unique<StoreChainContext>(*TTI, Operands, RangeSizes));
+        if (!AllContexts.back()->initializeContext(R, *DL))
+          AllContexts.pop_back();
+        else
+          GlobalMaxVF = std::max(GlobalMaxVF, AllContexts.back()->MaxVF);
+      }
+      Operands.clear();
+      RangeSizes.clear();
+      if (Idx != StoreSeq.size() - 1) {
         Operands.push_back(Stores[InstIdx]);
+        RangeSizes.emplace_back(Idx, 1);
         PrevDist = Dist;
-      });
-
-      if (Operands.size() <= 1 ||
-          !Visited
-               .insert({Operands.front(),
-                        cast<StoreInst>(Operands.front())->getValueOperand(),
-                        Operands.back(),
-                        cast<StoreInst>(Operands.back())->getValueOperand(),
-                        Operands.size()})
-               .second)
-        continue;
-
-      unsigned MaxVecRegSize = R.getMaxVecRegSize();
-      unsigned EltSize = R.getVectorElementSize(Operands[0]);
-      unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
-
-      unsigned MaxVF =
-          std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
-      auto *Store = cast<StoreInst>(Operands[0]);
-      Type *StoreTy = Store->getValueOperand()->getType();
-      Type *ValueTy = StoreTy;
-      if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
-        ValueTy = Trunc->getSrcTy();
-      // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
-      // getStoreMinimumVF only support scalar type as arguments. As a result,
-      // we need to use the element type of StoreTy and ValueTy to retrieve the
-      // VF and then transform it back.
-      // Remember: VF is defined as the number we want to vectorize, not the
-      // number of elements in the final vector.
-      Type *StoreScalarTy = StoreTy->getScalarType();
-      unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
-          R.getMinVF(DL->getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
-          ValueTy->getScalarType()));
-      MinVF /= getNumElements(StoreTy);
-      MinVF = std::max<unsigned>(2, MinVF);
-
-      if (MaxVF < MinVF) {
-        LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
-                          << ") < "
-                          << "MinVF (" << MinVF << ")\n");
-        continue;
       }
+    }
 
-      unsigned NonPowerOf2VF = 0;
-      if (VectorizeNonPowerOf2) {
-        // First try vectorizing with a non-power-of-2 VF. At the moment, only
-        // consider cases where VF + 1 is a power-of-2, i.e. almost all vector
-        // lanes are used.
-        unsigned CandVF = std::clamp<unsigned>(Operands.size(), MinVF, MaxVF);
-        if (has_single_bit(CandVF + 1)) {
-          NonPowerOf2VF = CandVF;
-          assert(NonPowerOf2VF != MaxVF &&
-                 "Non-power-of-2 VF should not be equal to MaxVF");
-        }
-      }
+    constexpr unsigned MaxAttempts = 4;
+    for (unsigned LimitVF = GlobalMaxVF; LimitVF > 0;
+         LimitVF = bit_ceil(LimitVF) / 2) {
+      for (const auto &CtxPtr : AllContexts) {
+        StoreChainContext &Context = *CtxPtr;
+        for (std::optional<unsigned> VFUnval = Context.getCurrentVF();
+             VFUnval && *VFUnval >= LimitVF; VFUnval = Context.getCurrentVF()) {
+          unsigned VF = *VFUnval;
 
-      // MaxRegVF represents the number of instructions (scalar, or vector in
-      // case of revec) that can be vectorized to naturally fit in a vector
-      // register.
-      unsigned MaxRegVF = MaxVF;
-
-      MaxVF = std::min<unsigned>(MaxVF, bit_floor(Operands.size()));
-      if (MaxVF < MinVF) {
-        LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
-                          << ") < "
-                          << "MinVF (" << MinVF << ")\n");
-        continue;
-      }
-
-      SmallVector<unsigned> CandidateVFs;
-      for (unsigned VF = std::max(MaxVF, NonPowerOf2VF); VF >= MinVF;
-           VF = divideCeil(VF, 2))
-        CandidateVFs.push_back(VF);
-
-      unsigned End = Operands.size();
-      unsigned Repeat = 0;
-      constexpr unsigned MaxAttempts = 4;
-      // first: the best TreeSize from all prior loops over CandidateVFs, gets
-      // updated after looping through CandidateVFs
-      // second: the best TreeSize from all prior loops including the current
-      // one
-      llvm::SmallVector<std::pair<unsigned, unsigned>> RangeSizesStorage(
-          Operands.size(), {1, 1});
-      // The `slice` and `drop_front` interfaces are convenient
-      const auto RangeSizes = MutableArrayRef(RangeSizesStorage);
-      DenseMap<Value *, std::pair<unsigned, unsigned>> NonSchedulable;
-      auto IsNotVectorized = [](const std::pair<unsigned, unsigned> &P) {
-        return P.first > 0;
-      };
-      auto IsVectorized = [](const std::pair<unsigned, unsigned> &P) {
-        return P.first == 0;
-      };
-      auto VFIsProfitable = [](unsigned Size,
-                               const std::pair<unsigned, unsigned> &P) {
-        return Size >= P.first;
-      };
-      auto FirstSizeSame = [](unsigned Size,
-                              const std::pair<unsigned, unsigned> &P) {
-        return Size == P.first;
-      };
-      while (true) {
-        ++Repeat;
-        bool RepeatChanged = false;
-        bool AnyProfitableGraph = false;
-        for (unsigned VF : CandidateVFs) {
-          AnyProfitableGraph = false;
-          unsigned FirstUnvecStore = std::distance(
-              RangeSizes.begin(), find_if(RangeSizes, IsNotVectorized));
+          bool AnyProfitableGraph = false;
+          unsigned FirstUnvecStore = Context.getFirstUnvecStore();
 
           // Form slices of size VF starting from FirstUnvecStore and try to
           // vectorize them.
-          while (FirstUnvecStore < End) {
-            unsigned FirstVecStore = std::distance(
-                RangeSizes.begin(),
-                find_if(RangeSizes.drop_front(FirstUnvecStore), IsVectorized));
-            unsigned MaxSliceEnd = FirstVecStore >= End ? End : FirstVecStore;
+          while (FirstUnvecStore < Context.End) {
+            unsigned FirstVecStore =
+                Context.getFirstVecStoreAfter(FirstUnvecStore);
+            unsigned MaxSliceEnd =
+                FirstVecStore >= Context.End ? Context.End : FirstVecStore;
             for (unsigned SliceStartIdx = FirstUnvecStore;
                  SliceStartIdx + VF <= MaxSliceEnd;) {
-              if (!checkTreeSizes(RangeSizes.slice(SliceStartIdx, VF))) {
+              if (!checkTreeSizes(
+                      Context.RangeSizes.slice(SliceStartIdx, VF))) {
                 ++SliceStartIdx;
                 continue;
               }
               ArrayRef<Value *> Slice =
-                  ArrayRef(Operands).slice(SliceStartIdx, VF);
+                  ArrayRef(Context.Operands).slice(SliceStartIdx, VF);
               assert(all_of(Slice,
                             [&](Value *V) {
                               return cast<StoreInst>(V)
@@ -24843,9 +25043,9 @@ bool SLPVectorizerPass::vectorizeStores(
                                          ->getType();
                             }) &&
                      "Expected all operands of same type.");
-              if (!NonSchedulable.empty()) {
+              if (!Context.NonSchedulable.empty()) {
                 auto [NonSchedSizeMax, NonSchedSizeMin] =
-                    NonSchedulable.lookup(Slice.front());
+                    Context.NonSchedulable.lookup(Slice.front());
                 if (NonSchedSizeMax > 0 && NonSchedSizeMin <= VF) {
                   // VF is too ambitious. Try to vectorize another slice before
                   // trying a smaller VF.
@@ -24854,12 +25054,12 @@ bool SLPVectorizerPass::vectorizeStores(
                 }
               }
               unsigned TreeSize;
-              std::optional<bool> Res =
-                  vectorizeStoreChain(Slice, R, SliceStartIdx, MinVF, TreeSize);
+              std::optional<bool> Res = vectorizeStoreChain(
+                  Slice, R, SliceStartIdx, Context.MinVF, TreeSize);
               if (!Res) {
                 // Update the range of non schedulable VFs for slices starting
                 // at SliceStartIdx.
-                NonSchedulable
+                Context.NonSchedulable
                     .try_emplace(Slice.front(), std::make_pair(VF, VF))
                     .first->getSecond()
                     .second = VF;
@@ -24867,101 +25067,74 @@ bool SLPVectorizerPass::vectorizeStores(
                 // Mark the vectorized stores so that we don't vectorize them
                 // again.
                 VectorizedStores.insert_range(Slice);
-                AnyProfitableGraph = RepeatChanged = Changed = true;
+                AnyProfitableGraph = Context.RepeatChanged = Changed = true;
                 // If we vectorized initial block, no need to try to vectorize
                 // it again.
-                for (std::pair<unsigned, unsigned> &P :
-                     RangeSizes.slice(SliceStartIdx, VF))
-                  P.first = P.second = 0;
-                if (SliceStartIdx < FirstUnvecStore + MinVF) {
-                  for (std::pair<unsigned, unsigned> &P : RangeSizes.slice(
-                           FirstUnvecStore, SliceStartIdx - FirstUnvecStore))
-                    P.first = P.second = 0;
-                  FirstUnvecStore = SliceStartIdx + VF;
-                }
-                if (SliceStartIdx > MaxSliceEnd - VF - MinVF) {
-                  for (std::pair<unsigned, unsigned> &P :
-                       RangeSizes.slice(SliceStartIdx + VF,
-                                        MaxSliceEnd - (SliceStartIdx + VF)))
-                    P.first = P.second = 0;
-                  if (MaxSliceEnd == End)
-                    End = SliceStartIdx;
-                  MaxSliceEnd = SliceStartIdx;
-                }
+                Context.markRangeVectorized(SliceStartIdx, VF, FirstUnvecStore,
+                                            MaxSliceEnd);
                 SliceStartIdx += VF;
                 continue;
               }
               if (VF > 2 && Res &&
-                  !all_of(RangeSizes.slice(SliceStartIdx, VF),
-                          std::bind(VFIsProfitable, TreeSize, _1))) {
+                  !Context.allOfRangeProfitable(SliceStartIdx, VF, TreeSize)) {
                 SliceStartIdx += VF;
                 continue;
               }
               // Check for the very big VFs that we're not rebuilding same
               // trees, just with larger number of elements.
-              if (VF > MaxRegVF && TreeSize > 1 &&
-                  all_of(RangeSizes.slice(SliceStartIdx, VF),
-                         std::bind(FirstSizeSame, TreeSize, _1))) {
+              if (VF > Context.MaxRegVF && TreeSize > 1 &&
+                  Context.isFirstSizeSameRange(SliceStartIdx, VF, TreeSize)) {
                 SliceStartIdx += VF;
                 while (SliceStartIdx != MaxSliceEnd &&
-                       RangeSizes[SliceStartIdx].first == TreeSize)
+                       Context.RangeSizes[SliceStartIdx].first == TreeSize)
                   ++SliceStartIdx;
                 continue;
               }
               if (TreeSize > 1)
-                for (std::pair<unsigned, unsigned> &P :
-                     RangeSizes.slice(SliceStartIdx, VF))
-                  P.second = std::max(P.second, TreeSize);
+                Context.updateCachedRangeSizes(SliceStartIdx, VF, TreeSize);
               ++SliceStartIdx;
               AnyProfitableGraph = true;
             }
-            if (FirstUnvecStore >= End)
+            if (FirstUnvecStore >= Context.End)
               break;
             if (MaxSliceEnd - FirstUnvecStore < VF &&
-                MaxSliceEnd - FirstUnvecStore >= MinVF)
+                MaxSliceEnd - FirstUnvecStore >= Context.MinVF)
               AnyProfitableGraph = true;
-            FirstUnvecStore = std::distance(
-                RangeSizes.begin(),
-                find_if(RangeSizes.drop_front(MaxSliceEnd), IsNotVectorized));
+            FirstUnvecStore = Context.getFirstUnvecStore(MaxSliceEnd);
           }
-          if (!AnyProfitableGraph && VF >= MaxRegVF && has_single_bit(VF))
+          if (!AnyProfitableGraph && VF >= Context.MaxRegVF &&
+              has_single_bit(VF))
             break;
           // For the MaxRegVF case, save RangeSizes to limit compile time
-          if (VF == MaxRegVF)
-            for (std::pair<unsigned, unsigned> &P : RangeSizes)
-              if (P.first != 0)
-                P.first = std::max(P.second, P.first);
-        }
-        // All values vectorized - exit.
-        if (all_of(RangeSizes, IsVectorized))
-          break;
-        // Check if tried all attempts or no need for the last attempts at all.
-        if (Repeat >= MaxAttempts ||
-            (Repeat > 1 && (RepeatChanged || !AnyProfitableGraph)))
-          break;
-        constexpr unsigned StoresLimit = 64;
-        const unsigned MaxTotalNum = std::min<unsigned>(
-            Operands.size(),
-            static_cast<unsigned>(
-                End - std::distance(RangeSizes.begin(),
-                                    find_if(RangeSizes, IsNotVectorized))));
-        unsigned VF = bit_ceil(CandidateVFs.front()) * 2;
-        if (VF > MaxTotalNum || VF >= StoresLimit)
-          break;
-        for (std::pair<unsigned, unsigned> &P : RangeSizes) {
-          if (P.first != 0)
-            P.first = std::max(P.second, P.first);
+          if (VF == Context.MaxRegVF)
+            Context.updateRangeSizesFromCache();
+
+          Context.incrementVF();
+          if (!Context.getCurrentVF()) {
+            // All values vectorized - exit.
+            if (Context.allVectorized()) {
+              Context.Done = true;
+              break;
+            }
+            // Check if tried all attempts or no need for the last attempts at
+            // all.
+            if (Context.Repeat >= MaxAttempts ||
+                (Context.Repeat > 1 &&
+                 (Context.RepeatChanged || !AnyProfitableGraph))) {
+              Context.Done = true;
+              break;
+            }
+
+            if (!Context.updateCandidateVFs()) {
+              Context.Done = true;
+              break;
+            }
+            Context.updateRangeSizesFromCache();
+          }
         }
-        // Attempt again to vectorize even larger chains if all previous
-        // attempts were unsuccessful because of the cost issues.
-        CandidateVFs.clear();
-        unsigned Limit =
-            getFloorFullVectorNumberOfElements(*TTI, StoreTy, MaxTotalNum);
-        if (bit_floor(Limit) == VF && Limit != VF)
-          CandidateVFs.push_back(Limit);
-        CandidateVFs.push_back(VF);
       }
     }
+    StoreChainContext::RangeSizesByIdx.clear();
   };
 
   /// Groups of stores to vectorize

>From a51e8f9772f7b07070a694c5150dad28647e6aac Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 11:54:48 -0700
Subject: [PATCH 04/16] Fix comment typos

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 336e1bdebf76f..7f5c3da09ecd0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24572,8 +24572,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
 
 namespace {
 /// A group of related stores which we are in the process of vectorizing,
-/// a subset of which may already vectorized. Stores context information
-/// about the group as a whole as well as information about what VF's need
+/// a subset of which may already be vectorized. Stores context information
+/// about the group as a whole as well as information about what VFs need
 /// to be attempted still.
 struct StoreChainContext {
   using SizePair = std::pair<unsigned, unsigned>;

>From 1dd64f5ad19ef988916b69fb39b41e1d667f68be Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 11:58:59 -0700
Subject: [PATCH 05/16] Make a RangeSizesByIdx array for each TryToVectorize()
 call

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 60 +++++++++++--------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7f5c3da09ecd0..6bc47210e6c0a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24622,7 +24622,7 @@ struct StoreChainContext {
 
   /// RangeSize information for all elements in any chain
   /// Needed since may be overlap between chains
-  inline static SmallVector<unsigned> RangeSizesByIdx;
+  SmallVector<unsigned> &RangeSizesByIdx;
   /// Element has not been vectorized, but due to the elements around it being
   /// vectorized, it does not have enough neighboring elements to make a chain
   /// longer than MinVF as part of the current Context
@@ -24631,24 +24631,26 @@ struct StoreChainContext {
 
   explicit StoreChainContext(const TargetTransformInfo &TTI,
                              ArrayRef<Value *> Ops,
-                             ArrayRef<SizePair> RangeSizes)
-      : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes) {}
+                             ArrayRef<SizePair> RangeSizes,
+                             SmallVector<unsigned> &RangeSizesByIdx)
+      : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes),
+        RangeSizesByIdx(RangeSizesByIdx) {}
 
-  static bool isNotVectorized(const SizePair &P) {
+  bool isNotVectorized(const SizePair &P) const {
     return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
   }
 
-  static bool isVectorized(const SizePair &P) {
+  bool isVectorized(const SizePair &P) const {
     return P.first == LocallyUnvectorizable || RangeSizesByIdx[P.first] == 0;
   }
 
-  static bool vfIsProfitable(unsigned Size, const SizePair &P) {
+  bool vfIsProfitable(unsigned Size, const SizePair &P) const {
     assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
            "Cannot check profitability of vectorized element");
     return Size >= RangeSizesByIdx[P.first];
   }
 
-  static bool firstSizeSame(unsigned Size, const SizePair &P) {
+  bool firstSizeSame(unsigned Size, const SizePair &P) const {
     assert(P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] &&
            "Cannot check profitability of vectorized element");
     return Size == RangeSizesByIdx[P.first];
@@ -24658,33 +24660,41 @@ struct StoreChainContext {
   unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
     return std::distance(
         RangeSizes.begin(),
-        find_if(RangeSizes.drop_front(StartIdx), isNotVectorized));
+        find_if(RangeSizes.drop_front(StartIdx), [this](const SizePair &P) {
+          return this->isNotVectorized(P);
+        }));
   }
 
   // Return the index of the first vectorized store after \p StartIdx
   unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
     return std::distance(
         RangeSizes.begin(),
-        find_if(RangeSizes.drop_front(StartIdx), isVectorized));
+        find_if(RangeSizes.drop_front(StartIdx),
+                [this](const SizePair &P) { return this->isVectorized(P); }));
   }
 
   // Return true if all stores have been vectorized
-  bool allVectorized() const { return all_of(RangeSizes, isVectorized); }
+  bool allVectorized() const {
+    return all_of(RangeSizes,
+                  [this](const SizePair &P) { return this->isVectorized(P); });
+  }
 
   // Return true if all elements in the given range match \p TreeSize
   bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
                             unsigned TreeSize) const {
-    return all_of(
-        RangeSizes.slice(StartIdx, Length),
-        [TreeSize](const SizePair &P) { return firstSizeSame(TreeSize, P); });
+    return all_of(RangeSizes.slice(StartIdx, Length),
+                  [TreeSize, this](const SizePair &P) {
+                    return firstSizeSame(TreeSize, P);
+                  });
   }
 
   // Return true if the \p TreeSize is profitable for all elements in the range
   bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
                             unsigned TreeSize) const {
-    return all_of(
-        RangeSizes.slice(StartIdx, Length),
-        [TreeSize](const SizePair &P) { return vfIsProfitable(TreeSize, P); });
+    return all_of(RangeSizes.slice(StartIdx, Length),
+                  [TreeSize, this](const SizePair &P) {
+                    return vfIsProfitable(TreeSize, P);
+                  });
   }
 
   // Update the live (first) range sizes from the cached values (second)
@@ -24837,14 +24847,15 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
 } // namespace
 
 /// Checks if the quadratic mean deviation is less than 90% of the mean size.
-static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
+static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
+                           const SmallVector<unsigned> &RangeSizesByIdx) {
   unsigned Num = 0;
   uint64_t Sum = std::accumulate(
       Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
       [&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
         unsigned Size = Val.first == StoreChainContext::LocallyUnvectorizable
                             ? 0
-                            : StoreChainContext::RangeSizesByIdx[Val.first];
+                            : RangeSizesByIdx[Val.first];
         if (Size == 1)
           return V;
         ++Num;
@@ -24861,7 +24872,7 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes) {
                        unsigned P =
                            Val.first == StoreChainContext::LocallyUnvectorizable
                                ? 0
-                               : StoreChainContext::RangeSizesByIdx[Val.first];
+                               : RangeSizesByIdx[Val.first];
                        if (P == 1)
                          return V;
                        return V + (P - Mean) * (P - Mean);
@@ -24967,7 +24978,7 @@ bool SLPVectorizerPass::vectorizeStores(
   auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
     int64_t PrevDist = -1;
     unsigned GlobalMaxVF = 0;
-    StoreChainContext::RangeSizesByIdx.assign(StoreSeq.size(), 1);
+    SmallVector<unsigned> RangeSizesByIdx(StoreSeq.size(), 1);
     SmallVector<std::unique_ptr<StoreChainContext>> AllContexts;
     BoUpSLP::ValueList Operands;
     SmallVector<StoreChainContext::SizePair> RangeSizes;
@@ -24989,8 +25000,8 @@ bool SLPVectorizerPass::vectorizeStores(
                        cast<StoreInst>(Operands.back())->getValueOperand(),
                        Operands.size()})
               .second) {
-        AllContexts.emplace_back(
-            std::make_unique<StoreChainContext>(*TTI, Operands, RangeSizes));
+        AllContexts.emplace_back(std::make_unique<StoreChainContext>(
+            *TTI, Operands, RangeSizes, RangeSizesByIdx));
         if (!AllContexts.back()->initializeContext(R, *DL))
           AllContexts.pop_back();
         else
@@ -25026,8 +25037,8 @@ bool SLPVectorizerPass::vectorizeStores(
                 FirstVecStore >= Context.End ? Context.End : FirstVecStore;
             for (unsigned SliceStartIdx = FirstUnvecStore;
                  SliceStartIdx + VF <= MaxSliceEnd;) {
-              if (!checkTreeSizes(
-                      Context.RangeSizes.slice(SliceStartIdx, VF))) {
+              if (!checkTreeSizes(Context.RangeSizes.slice(SliceStartIdx, VF),
+                                  RangeSizesByIdx)) {
                 ++SliceStartIdx;
                 continue;
               }
@@ -25134,7 +25145,6 @@ bool SLPVectorizerPass::vectorizeStores(
         }
       }
     }
-    StoreChainContext::RangeSizesByIdx.clear();
   };
 
   /// Groups of stores to vectorize

>From b57477155400f2d2aadf8de521683c49d7a47fdd Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:08:14 -0700
Subject: [PATCH 06/16] Fix logic error

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6bc47210e6c0a..1760c0785dda8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24992,7 +24992,7 @@ bool SLPVectorizerPass::vectorizeStores(
           continue;
       }
 
-      if (Operands.size() > 1 ||
+      if (Operands.size() > 1 &&
           Visited
               .insert({Operands.front(),
                        cast<StoreInst>(Operands.front())->getValueOperand(),

>From 4c1154ba1b241e28ee492e75f3b1910a63ed61e9 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:44:55 -0700
Subject: [PATCH 07/16] Pass around TTI rather than storing it as part of
 StoreContextChain

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1760c0785dda8..39994b63ce595 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24579,8 +24579,6 @@ struct StoreChainContext {
   using SizePair = std::pair<unsigned, unsigned>;
   using SizePairArrayRef = MutableArrayRef<SizePair>;
 
-  const TargetTransformInfo &TTI;
-
   /// For the StoreTy/Stride in the given group, what is the smallest VF
   /// that can be used
   unsigned MinVF = 0;
@@ -24629,11 +24627,10 @@ struct StoreChainContext {
   inline static const unsigned LocallyUnvectorizable =
       std::numeric_limits<unsigned>::max();
 
-  explicit StoreChainContext(const TargetTransformInfo &TTI,
-                             ArrayRef<Value *> Ops,
+  explicit StoreChainContext(ArrayRef<Value *> Ops,
                              ArrayRef<SizePair> RangeSizes,
                              SmallVector<unsigned> &RangeSizesByIdx)
-      : TTI(TTI), Operands(Ops), RangeSizesStorage(RangeSizes),
+      : Operands(Ops), RangeSizesStorage(RangeSizes),
         RangeSizesByIdx(RangeSizesByIdx) {}
 
   bool isNotVectorized(const SizePair &P) const {
@@ -24713,7 +24710,7 @@ struct StoreChainContext {
   }
 
   // Update CandidateVFs for secondary iterations
-  bool updateCandidateVFs() {
+  bool updateCandidateVFs(const TargetTransformInfo &TTI) {
     assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
     constexpr unsigned StoresLimit = 64;
     const unsigned MaxTotalNum = std::min<unsigned>(
@@ -24745,7 +24742,8 @@ struct StoreChainContext {
   void incrementVF() { CandidateVFs.pop(); }
 
   // Set up initial values using the already set Operands
-  bool initializeContext(BoUpSLP &R, const DataLayout &DL);
+  bool initializeContext(BoUpSLP &R, const DataLayout &DL,
+                         const TargetTransformInfo &TTI);
 
   // Record vectorization of the provided range
   void markRangeVectorized(unsigned StartIdx, unsigned Length,
@@ -24777,7 +24775,8 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
   }
 }
 
-bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL) {
+bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
+                                          const TargetTransformInfo &TTI) {
   // Initialize range tracking in context.
   RangeSizes = MutableArrayRef(RangeSizesStorage);
 
@@ -25001,8 +25000,8 @@ bool SLPVectorizerPass::vectorizeStores(
                        Operands.size()})
               .second) {
         AllContexts.emplace_back(std::make_unique<StoreChainContext>(
-            *TTI, Operands, RangeSizes, RangeSizesByIdx));
-        if (!AllContexts.back()->initializeContext(R, *DL))
+            Operands, RangeSizes, RangeSizesByIdx));
+        if (!AllContexts.back()->initializeContext(R, *DL, *TTI))
           AllContexts.pop_back();
         else
           GlobalMaxVF = std::max(GlobalMaxVF, AllContexts.back()->MaxVF);
@@ -25136,7 +25135,7 @@ bool SLPVectorizerPass::vectorizeStores(
               break;
             }
 
-            if (!Context.updateCandidateVFs()) {
+            if (!Context.updateCandidateVFs(*TTI)) {
               Context.Done = true;
               break;
             }

>From ff033b0be6ace8912c330885a25637309d08983e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:46:02 -0700
Subject: [PATCH 08/16] Fix comment

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 39994b63ce595..dca1985dd5534 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24609,8 +24609,9 @@ struct StoreChainContext {
   std::queue<unsigned> CandidateVFs;
   /// Track the TreeSizes of prior vectorization attempts using each element,
   /// to help us find early exit cases
-  /// .first contains pointer into RangeSizesByIdx to help us track
+  /// - first: contains pointer into RangeSizesByIdx to help us track
   /// vectorization of elements that belong to multiple chains
+  /// - second: contains cached TreeSize value for that element
   SmallVector<SizePair> RangeSizesStorage;
   SizePairArrayRef RangeSizes;
   /// Store information about failed vectorization attempts due to scheduling

>From a0d84fb2b873155c6b5b31d71e9a89be74d69870 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 12:58:34 -0700
Subject: [PATCH 09/16] [SLP] Update DenseMap to be of type SmallDenseMap

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dca1985dd5534..b07833f331596 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24615,7 +24615,7 @@ struct StoreChainContext {
   SmallVector<SizePair> RangeSizesStorage;
   SizePairArrayRef RangeSizes;
   /// Store information about failed vectorization attempts due to scheduling
-  DenseMap<Value *, SizePair> NonSchedulable;
+  SmallDenseMap<Value *, SizePair> NonSchedulable;
   /// Type of the Stores in `Operands`
   Type *StoreTy = nullptr;
 

>From 7d513d3227d28e8102a5f767b1a10faf53c7aa50 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 14:06:44 -0700
Subject: [PATCH 10/16] [SLP] Remove excessive 'using' statement

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b07833f331596..16864171dc48a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24577,7 +24577,6 @@ namespace {
 /// to be attempted still.
 struct StoreChainContext {
   using SizePair = std::pair<unsigned, unsigned>;
-  using SizePairArrayRef = MutableArrayRef<SizePair>;
 
   /// For the StoreTy/Stride in the given group, what is the smallest VF
   /// that can be used
@@ -24613,7 +24612,7 @@ struct StoreChainContext {
   /// vectorization of elements that belong to multiple chains
   /// - second: contains cached TreeSize value for that element
   SmallVector<SizePair> RangeSizesStorage;
-  SizePairArrayRef RangeSizes;
+  MutableArrayRef<SizePair> RangeSizes;
   /// Store information about failed vectorization attempts due to scheduling
   SmallDenseMap<Value *, SizePair> NonSchedulable;
   /// Type of the Stores in `Operands`

>From 3bb29e37907c9d1628485d3b875e4a8f27f16942 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 14:48:44 -0700
Subject: [PATCH 11/16] Remove excessive qualifications

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 16864171dc48a..3fc0b15550577 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24753,10 +24753,10 @@ struct StoreChainContext {
 void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
                                             unsigned &FirstUnvecStore,
                                             unsigned &MaxSliceEnd) {
-  for (StoreChainContext::SizePair &P : RangeSizes.slice(StartIdx, Length))
+  for (SizePair &P : RangeSizes.slice(StartIdx, Length))
     RangeSizesByIdx[P.first] = P.second = 0;
   if (StartIdx < FirstUnvecStore + MinVF) {
-    for (StoreChainContext::SizePair &P :
+    for (SizePair &P :
          RangeSizes.slice(FirstUnvecStore, StartIdx - FirstUnvecStore)) {
       P.first = LocallyUnvectorizable;
       P.second = 0;
@@ -24764,7 +24764,7 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
     FirstUnvecStore = StartIdx + Length;
   }
   if (StartIdx + Length > MaxSliceEnd - MinVF) {
-    for (StoreChainContext::SizePair &P : RangeSizes.slice(
+    for (SizePair &P : RangeSizes.slice(
              StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
       P.first = LocallyUnvectorizable;
       P.second = 0;

>From a93809507fbfb7ef120888d0942ca502d9d4d0a4 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 15:40:05 -0700
Subject: [PATCH 12/16] Reorganize to make StoreChainContext a class

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 256 ++++++++++--------
 1 file changed, 138 insertions(+), 118 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3fc0b15550577..0f0b63ee2d0db 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24575,17 +24575,22 @@ namespace {
 /// a subset of which may already be vectorized. Stores context information
 /// about the group as a whole as well as information about what VFs need
 /// to be attempted still.
-struct StoreChainContext {
+class StoreChainContext {
+public:
   using SizePair = std::pair<unsigned, unsigned>;
 
+  /// In RangeSizes, element has not been vectorized, but due to the elements
+  /// around it being vectorized, it does not have enough neighboring elements
+  /// to make a chain longer than MinVF as part of the current Context
+  inline static const unsigned LocallyUnvectorizable =
+      std::numeric_limits<unsigned>::max();
+
   /// For the StoreTy/Stride in the given group, what is the smallest VF
   /// that can be used
   unsigned MinVF = 0;
   /// Maximum number of instructions that can be vectorized, either
   /// constrained by register width or operands size.
   unsigned MaxVF = 0;
-  /// The largest VF checked in the current Repeat
-  unsigned ProbeVF = 0;
   /// MaxRegVF represents the number of instructions (scalar, or vector in
   /// case of revec) that can be vectorized to naturally fit in a vector
   /// register.
@@ -24601,31 +24606,11 @@ struct StoreChainContext {
   /// What element index is the end of the to be vectorized Operands
   /// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
   unsigned End = 0;
-
   /// Stores that compose this chain
   BoUpSLP::ValueList Operands;
-  /// Which VFs do we want to attempt for this chain
-  std::queue<unsigned> CandidateVFs;
-  /// Track the TreeSizes of prior vectorization attempts using each element,
-  /// to help us find early exit cases
-  /// - first: contains pointer into RangeSizesByIdx to help us track
-  /// vectorization of elements that belong to multiple chains
-  /// - second: contains cached TreeSize value for that element
-  SmallVector<SizePair> RangeSizesStorage;
   MutableArrayRef<SizePair> RangeSizes;
   /// Store information about failed vectorization attempts due to scheduling
   SmallDenseMap<Value *, SizePair> NonSchedulable;
-  /// Type of the Stores in `Operands`
-  Type *StoreTy = nullptr;
-
-  /// RangeSize information for all elements in any chain
-  /// Needed since may be overlap between chains
-  SmallVector<unsigned> &RangeSizesByIdx;
-  /// Element has not been vectorized, but due to the elements around it being
-  /// vectorized, it does not have enough neighboring elements to make a chain
-  /// longer than MinVF as part of the current Context
-  inline static const unsigned LocallyUnvectorizable =
-      std::numeric_limits<unsigned>::max();
 
   explicit StoreChainContext(ArrayRef<Value *> Ops,
                              ArrayRef<SizePair> RangeSizes,
@@ -24633,6 +24618,37 @@ struct StoreChainContext {
       : Operands(Ops), RangeSizesStorage(RangeSizes),
         RangeSizesByIdx(RangeSizesByIdx) {}
 
+  // Set up initial values using the already set Operands
+  bool initializeContext(BoUpSLP &R, const DataLayout &DL,
+                         const TargetTransformInfo &TTI);
+  // Return the index of the first unvectorized store after \p StartIdx
+  unsigned getFirstUnvecStore(unsigned StartIdx = 0) const;
+  // Return the index of the first vectorized store after \p StartIdx
+  unsigned getFirstVecStoreAfter(unsigned StartIdx) const;
+  // Return true if all stores have been vectorized
+  bool allVectorized() const;
+  // Return true if all elements in the given range match \p TreeSize
+  bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+                            unsigned TreeSize) const;
+  // Return true if the \p TreeSize is profitable for all elements in the range
+  bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+                            unsigned TreeSize) const;
+  // Update the live (first) range sizes from the cached values (second)
+  void updateRangeSizesFromCache();
+  // Update the cached (second) range sizes with the given \p TreeSize
+  void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
+                              unsigned TreeSize);
+  // Update CandidateVFs for secondary iterations
+  bool updateCandidateVFs(const TargetTransformInfo &TTI);
+  // Get the current VF
+  std::optional<unsigned> getCurrentVF() const;
+  // Remove the current VF from the queue
+  void incrementVF() { CandidateVFs.pop(); }
+  // Record vectorization of the provided range
+  void markRangeVectorized(unsigned StartIdx, unsigned Length,
+                           unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+
+private:
   bool isNotVectorized(const SizePair &P) const {
     return P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] > 0;
   }
@@ -24653,101 +24669,21 @@ struct StoreChainContext {
     return Size == RangeSizesByIdx[P.first];
   }
 
-  // Return the index of the first unvectorized store after \p StartIdx
-  unsigned getFirstUnvecStore(unsigned StartIdx = 0) const {
-    return std::distance(
-        RangeSizes.begin(),
-        find_if(RangeSizes.drop_front(StartIdx), [this](const SizePair &P) {
-          return this->isNotVectorized(P);
-        }));
-  }
-
-  // Return the index of the first vectorized store after \p StartIdx
-  unsigned getFirstVecStoreAfter(unsigned StartIdx) const {
-    return std::distance(
-        RangeSizes.begin(),
-        find_if(RangeSizes.drop_front(StartIdx),
-                [this](const SizePair &P) { return this->isVectorized(P); }));
-  }
-
-  // Return true if all stores have been vectorized
-  bool allVectorized() const {
-    return all_of(RangeSizes,
-                  [this](const SizePair &P) { return this->isVectorized(P); });
-  }
-
-  // Return true if all elements in the given range match \p TreeSize
-  bool isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
-                            unsigned TreeSize) const {
-    return all_of(RangeSizes.slice(StartIdx, Length),
-                  [TreeSize, this](const SizePair &P) {
-                    return firstSizeSame(TreeSize, P);
-                  });
-  }
-
-  // Return true if the \p TreeSize is profitable for all elements in the range
-  bool allOfRangeProfitable(unsigned StartIdx, unsigned Length,
-                            unsigned TreeSize) const {
-    return all_of(RangeSizes.slice(StartIdx, Length),
-                  [TreeSize, this](const SizePair &P) {
-                    return vfIsProfitable(TreeSize, P);
-                  });
-  }
-
-  // Update the live (first) range sizes from the cached values (second)
-  void updateRangeSizesFromCache() {
-    for (SizePair &P : RangeSizes) {
-      if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
-        RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
-    }
-  }
-
-  // Update the cached (second) range sizes with the given \p TreeSize
-  void updateCachedRangeSizes(unsigned StartIdx, unsigned Length,
-                              unsigned TreeSize) {
-    for (SizePair &P : RangeSizes.slice(StartIdx, Length))
-      P.second = std::max(P.second, TreeSize);
-  }
-
-  // Update CandidateVFs for secondary iterations
-  bool updateCandidateVFs(const TargetTransformInfo &TTI) {
-    assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
-    constexpr unsigned StoresLimit = 64;
-    const unsigned MaxTotalNum = std::min<unsigned>(
-        Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
-    unsigned VF = bit_ceil(ProbeVF) * 2;
-    if (VF > MaxTotalNum || VF >= StoresLimit)
-      return false;
-    // Attempt again to vectorize even larger chains if all previous
-    // attempts were unsuccessful because of the cost issues.
-    unsigned Limit =
-        getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
-    if (bit_floor(Limit) == VF && Limit != VF)
-      CandidateVFs.push(Limit);
-    CandidateVFs.push(VF);
-    ProbeVF = CandidateVFs.front();
-    ++Repeat;
-    RepeatChanged = false;
-    return true;
-  }
-
-  // Get the current VF
-  std::optional<unsigned> getCurrentVF() const {
-    if (Done || CandidateVFs.empty())
-      return std::nullopt;
-    return CandidateVFs.front();
-  }
-
-  // Increment the VF-Index counter, return false if at end of CandidateVFs
-  void incrementVF() { CandidateVFs.pop(); }
-
-  // Set up initial values using the already set Operands
-  bool initializeContext(BoUpSLP &R, const DataLayout &DL,
-                         const TargetTransformInfo &TTI);
-
-  // Record vectorization of the provided range
-  void markRangeVectorized(unsigned StartIdx, unsigned Length,
-                           unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+  /// The largest VF checked in the current Repeat
+  unsigned ProbeVF = 0;
+  /// Type of the Stores in `Operands`
+  Type *StoreTy = nullptr;
+  /// Which VFs do we want to attempt for this chain
+  std::queue<unsigned> CandidateVFs;
+  /// Track the TreeSizes of prior vectorization attempts using each element,
+  /// to help us find early exit cases
+  /// - first: contains pointer into RangeSizesByIdx to help us track
+  /// vectorization of elements that belong to multiple chains
+  /// - second: contains cached TreeSize value for that element
+  SmallVector<SizePair> RangeSizesStorage;
+  /// RangeSize information for all elements in any chain
+  /// Needed since may be overlap between chains
+  SmallVector<unsigned> &RangeSizesByIdx;
 };
 
 void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
@@ -24843,6 +24779,90 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
   ProbeVF = MaxVF;
   return true;
 }
+
+// Return the index of the first unvectorized store after \p StartIdx
+unsigned StoreChainContext::getFirstUnvecStore(unsigned StartIdx) const {
+  return std::distance(
+      RangeSizes.begin(),
+      find_if(RangeSizes.drop_front(StartIdx),
+              [this](const SizePair &P) { return this->isNotVectorized(P); }));
+}
+
+// Return the index of the first vectorized store after \p StartIdx
+unsigned StoreChainContext::getFirstVecStoreAfter(unsigned StartIdx) const {
+  return std::distance(
+      RangeSizes.begin(),
+      find_if(RangeSizes.drop_front(StartIdx),
+              [this](const SizePair &P) { return this->isVectorized(P); }));
+}
+
+// Return true if all stores have been vectorized
+bool StoreChainContext::allVectorized() const {
+  return all_of(RangeSizes,
+                [this](const SizePair &P) { return this->isVectorized(P); });
+}
+
+// Return true if all elements in the given range match \p TreeSize
+bool StoreChainContext::isFirstSizeSameRange(unsigned StartIdx, unsigned Length,
+                                             unsigned TreeSize) const {
+  return all_of(RangeSizes.slice(StartIdx, Length),
+                [TreeSize, this](const SizePair &P) {
+                  return firstSizeSame(TreeSize, P);
+                });
+}
+
+// Return true if the \p TreeSize is profitable for all elements in the range
+bool StoreChainContext::allOfRangeProfitable(unsigned StartIdx, unsigned Length,
+                                             unsigned TreeSize) const {
+  return all_of(RangeSizes.slice(StartIdx, Length),
+                [TreeSize, this](const SizePair &P) {
+                  return vfIsProfitable(TreeSize, P);
+                });
+}
+
+// Update the live (first) range sizes from the cached values (second)
+void StoreChainContext::updateRangeSizesFromCache() {
+  for (SizePair &P : RangeSizes) {
+    if (P.first != LocallyUnvectorizable && RangeSizesByIdx[P.first] != 0)
+      RangeSizesByIdx[P.first] = std::max(P.second, RangeSizesByIdx[P.first]);
+  }
+}
+
+// Update the cached (second) range sizes with the given \p TreeSize
+void StoreChainContext::updateCachedRangeSizes(unsigned StartIdx,
+                                               unsigned Length,
+                                               unsigned TreeSize) {
+  for (SizePair &P : RangeSizes.slice(StartIdx, Length))
+    P.second = std::max(P.second, TreeSize);
+}
+
+bool StoreChainContext::updateCandidateVFs(const TargetTransformInfo &TTI) {
+  assert(CandidateVFs.empty() && "Did not use all VFs before refilling");
+  constexpr unsigned StoresLimit = 64;
+  const unsigned MaxTotalNum = std::min<unsigned>(
+      Operands.size(), static_cast<unsigned>(End - getFirstUnvecStore()));
+  unsigned VF = bit_ceil(ProbeVF) * 2;
+  if (VF > MaxTotalNum || VF >= StoresLimit)
+    return false;
+  // Attempt again to vectorize even larger chains if all previous
+  // attempts were unsuccessful because of the cost issues.
+  unsigned Limit =
+      getFloorFullVectorNumberOfElements(TTI, StoreTy, MaxTotalNum);
+  if (bit_floor(Limit) == VF && Limit != VF)
+    CandidateVFs.push(Limit);
+  CandidateVFs.push(VF);
+  ProbeVF = CandidateVFs.front();
+  ++Repeat;
+  RepeatChanged = false;
+  return true;
+}
+
+// Get the current VF
+std::optional<unsigned> StoreChainContext::getCurrentVF() const {
+  if (Done || CandidateVFs.empty())
+    return std::nullopt;
+  return CandidateVFs.front();
+}
 } // namespace
 
 /// Checks if the quadratic mean deviation is less than 90% of the mean size.

>From 43790c74428b1813381aa9df415e4c092bf59675 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 15:46:19 -0700
Subject: [PATCH 13/16] Refactor to remove StoreContextChain::Done

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f0b63ee2d0db..03492a4e5c47e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24600,9 +24600,6 @@ class StoreChainContext {
   unsigned Repeat = 1;
   /// Did any vectorization occur for the current iteration over CandidateVFs
   bool RepeatChanged = false;
-  /// Are we finished checking this StoreChainContext? Can be due to all VFs
-  /// being checked, or an early exit condition
-  bool Done = false;
   /// What element index is the end of the to be vectorized Operands
   /// i.e. Operands.size() == 16, and 12-15 were vectorized, then End == 12
   unsigned End = 0;
@@ -24743,7 +24740,6 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
     LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
                       << ") < "
                       << "MinVF (" << MinVF << ")\n");
-    Done = true;
     return false;
   }
 
@@ -24767,7 +24763,6 @@ bool StoreChainContext::initializeContext(BoUpSLP &R, const DataLayout &DL,
     LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
                       << ") < "
                       << "MinVF (" << MinVF << ")\n");
-    Done = true;
     return false;
   }
 
@@ -24859,7 +24854,7 @@ bool StoreChainContext::updateCandidateVFs(const TargetTransformInfo &TTI) {
 
 // Get the current VF
 std::optional<unsigned> StoreChainContext::getCurrentVF() const {
-  if (Done || CandidateVFs.empty())
+  if (CandidateVFs.empty())
     return std::nullopt;
   return CandidateVFs.front();
 }
@@ -25038,7 +25033,9 @@ bool SLPVectorizerPass::vectorizeStores(
     constexpr unsigned MaxAttempts = 4;
     for (unsigned LimitVF = GlobalMaxVF; LimitVF > 0;
          LimitVF = bit_ceil(LimitVF) / 2) {
-      for (const auto &CtxPtr : AllContexts) {
+      for (auto &CtxPtr : AllContexts) {
+        if (!CtxPtr)
+          break;
         StoreChainContext &Context = *CtxPtr;
         for (std::optional<unsigned> VFUnval = Context.getCurrentVF();
              VFUnval && *VFUnval >= LimitVF; VFUnval = Context.getCurrentVF()) {
@@ -25143,7 +25140,7 @@ bool SLPVectorizerPass::vectorizeStores(
           if (!Context.getCurrentVF()) {
             // All values vectorized - exit.
             if (Context.allVectorized()) {
-              Context.Done = true;
+              CtxPtr.reset();
               break;
             }
             // Check if tried all attempts or no need for the last attempts at
@@ -25151,12 +25148,12 @@ bool SLPVectorizerPass::vectorizeStores(
             if (Context.Repeat >= MaxAttempts ||
                 (Context.Repeat > 1 &&
                  (Context.RepeatChanged || !AnyProfitableGraph))) {
-              Context.Done = true;
+              CtxPtr.reset();
               break;
             }
 
             if (!Context.updateCandidateVFs(*TTI)) {
-              Context.Done = true;
+              CtxPtr.reset();
               break;
             }
             Context.updateRangeSizesFromCache();

>From 3099d36d628acab5caa3d0c87ddbf07616c23dab Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:19:03 -0700
Subject: [PATCH 14/16] Make CheckTreeSizes part for StoreChainContext

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 03492a4e5c47e..476239cc3026e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24644,6 +24644,8 @@ class StoreChainContext {
   // Record vectorization of the provided range
   void markRangeVectorized(unsigned StartIdx, unsigned Length,
                            unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
+  bool checkTreeSizes(const unsigned SliceStartIdx,
+                      const unsigned VF) const;
 
 private:
   bool isNotVectorized(const SizePair &P) const {
@@ -24858,11 +24860,11 @@ std::optional<unsigned> StoreChainContext::getCurrentVF() const {
     return std::nullopt;
   return CandidateVFs.front();
 }
-} // namespace
 
 /// Checks if the quadratic mean deviation is less than 90% of the mean size.
-static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
-                           const SmallVector<unsigned> &RangeSizesByIdx) {
+bool StoreChainContext::checkTreeSizes(const unsigned SliceStartIdx,
+                                       const unsigned VF) const {
+  auto Sizes = RangeSizes.slice(SliceStartIdx, VF);
   unsigned Num = 0;
   uint64_t Sum = std::accumulate(
       Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
@@ -24895,8 +24897,6 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
   return Dev * 96 / (Mean * Mean) == 0;
 }
 
-namespace {
-
 /// A group of stores that we'll try to bundle together using vector ops.
 /// They are ordered using the signed distance of their address operand to the
 /// address of this group's BaseInstr.
@@ -25053,8 +25053,7 @@ bool SLPVectorizerPass::vectorizeStores(
                 FirstVecStore >= Context.End ? Context.End : FirstVecStore;
             for (unsigned SliceStartIdx = FirstUnvecStore;
                  SliceStartIdx + VF <= MaxSliceEnd;) {
-              if (!checkTreeSizes(Context.RangeSizes.slice(SliceStartIdx, VF),
-                                  RangeSizesByIdx)) {
+              if (!Context.checkTreeSizes(SliceStartIdx, VF)) {
                 ++SliceStartIdx;
                 continue;
               }

>From e64daf224dba17366e82e06c43b456d026eb227e Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:27:30 -0700
Subject: [PATCH 15/16] Refactor to make RangeSizes private

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 476239cc3026e..6629a65dc0c4c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24605,7 +24605,6 @@ class StoreChainContext {
   unsigned End = 0;
   /// Stores that compose this chain
   BoUpSLP::ValueList Operands;
-  MutableArrayRef<SizePair> RangeSizes;
   /// Store information about failed vectorization attempts due to scheduling
   SmallDenseMap<Value *, SizePair> NonSchedulable;
 
@@ -24680,6 +24679,7 @@ class StoreChainContext {
   /// vectorization of elements that belong to multiple chains
   /// - second: contains cached TreeSize value for that element
   SmallVector<SizePair> RangeSizesStorage;
+  MutableArrayRef<SizePair> RangeSizes;
   /// RangeSize information for all elements in any chain
   /// Needed since may be overlap between chains
   SmallVector<unsigned> &RangeSizesByIdx;
@@ -25112,7 +25112,7 @@ bool SLPVectorizerPass::vectorizeStores(
                   Context.isFirstSizeSameRange(SliceStartIdx, VF, TreeSize)) {
                 SliceStartIdx += VF;
                 while (SliceStartIdx != MaxSliceEnd &&
-                       Context.RangeSizes[SliceStartIdx].first == TreeSize)
+                       Context.isFirstSizeSameRange(SliceStartIdx, 1, TreeSize))
                   ++SliceStartIdx;
                 continue;
               }

>From ce4335984c1fd9a1bd4ecac556cb987c40660a61 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 17 Mar 2026 16:56:30 -0700
Subject: [PATCH 16/16] Lint

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6629a65dc0c4c..6d9f888a303f5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24643,8 +24643,7 @@ class StoreChainContext {
   // Record vectorization of the provided range
   void markRangeVectorized(unsigned StartIdx, unsigned Length,
                            unsigned &FirstUnvecStore, unsigned &MaxSliceEnd);
-  bool checkTreeSizes(const unsigned SliceStartIdx,
-                      const unsigned VF) const;
+  bool checkTreeSizes(const unsigned SliceStartIdx, const unsigned VF) const;
 
 private:
   bool isNotVectorized(const SizePair &P) const {
@@ -24699,8 +24698,8 @@ void StoreChainContext::markRangeVectorized(unsigned StartIdx, unsigned Length,
     FirstUnvecStore = StartIdx + Length;
   }
   if (StartIdx + Length > MaxSliceEnd - MinVF) {
-    for (SizePair &P : RangeSizes.slice(
-             StartIdx + Length, MaxSliceEnd - (StartIdx + Length))) {
+    for (SizePair &P : RangeSizes.slice(StartIdx + Length,
+                                        MaxSliceEnd - (StartIdx + Length))) {
       P.first = LocallyUnvectorizable;
       P.second = 0;
     }



More information about the llvm-commits mailing list