[llvm] Analyze constant stride (PR #160411)

Mikhail Gudim via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 23 15:46:40 PDT 2025


https://github.com/mgudim created https://github.com/llvm/llvm-project/pull/160411

None

>From f7aed52e5a809cc62a45d88a613b73bc1d61536c Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Tue, 23 Sep 2025 14:17:44 -0700
Subject: [PATCH 1/2] [SLPVectorizer] Change arguments of 'isStridedLoad' (NFC)

This is needed to reduce the diff for the future work on widening
strided loads. Also, with this change we'll be able to re-use this for
the case when each pointer represents a start of a group of contiguous
loads.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 41 ++++++-------------
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1814d9a6811c0..4f9ec424da6d4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2234,10 +2234,9 @@ class BoUpSLP {
   /// TODO: If load combining is allowed in the IR optimizer, this analysis
   ///       may not be necessary.
   bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
-  bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
-                     ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
-                     const DataLayout &DL, ScalarEvolution &SE,
-                     const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
+  bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+                     Align Alignment, int64_t Diff, Value *Ptr0, Value *PtrN,
+                     StridedPtrInfo &SPtrInfo) const;
 
   /// Checks if the given array of loads can be represented as a vectorized,
   /// scatter or just simple gather.
@@ -6817,13 +6816,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
 /// 4. Any pointer operand is an instruction with the users outside of the
 /// current graph (for masked gathers extra extractelement instructions
 /// might be required).
-bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
-                            ArrayRef<unsigned> Order,
-                            const TargetTransformInfo &TTI,
-                            const DataLayout &DL, ScalarEvolution &SE,
-                            const int64_t Diff,
-                            StridedPtrInfo &SPtrInfo) const {
-  const size_t Sz = VL.size();
+bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+                            Align Alignment, int64_t Diff, Value *Ptr0,
+                            Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+  const size_t Sz = PointerOps.size();
   if (Diff % (Sz - 1) != 0)
     return false;
 
@@ -6835,7 +6831,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
   });
 
   const uint64_t AbsoluteDiff = std::abs(Diff);
-  Type *ScalarTy = VL.front()->getType();
   auto *VecTy = getWidenedType(ScalarTy, Sz);
   if (IsAnyPointerUsedOutGraph ||
       (AbsoluteDiff > Sz &&
@@ -6846,20 +6841,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
     int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
     if (Diff != Stride * static_cast<int64_t>(Sz - 1))
       return false;
-    Align Alignment =
-        cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
-            ->getAlign();
-    if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+    if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
       return false;
-    Value *Ptr0;
-    Value *PtrN;
-    if (Order.empty()) {
-      Ptr0 = PointerOps.front();
-      PtrN = PointerOps.back();
-    } else {
-      Ptr0 = PointerOps[Order.front()];
-      PtrN = PointerOps[Order.back()];
-    }
+
     // Iterate through all pointers and check if all distances are
     // unique multiple of Dist.
     SmallSet<int64_t, 4> Dists;
@@ -6868,14 +6852,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
       if (Ptr == PtrN)
         Dist = Diff;
       else if (Ptr != Ptr0)
-        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
       // If the strides are not the same or repeated, we can't
       // vectorize.
       if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
         break;
     }
     if (Dists.size() == Sz) {
-      Type *StrideTy = DL.getIndexType(Ptr0->getType());
+      Type *StrideTy = DL->getIndexType(Ptr0->getType());
       SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
       SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
       return true;
@@ -6964,7 +6948,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                                    cast<Instruction>(V), UserIgnoreList);
                              }))
       return LoadsState::CompressVectorize;
-    if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
+    if (isStridedLoad(PointerOps, ScalarTy, CommonAlignment, *Diff, Ptr0, PtrN,
+                      SPtrInfo))
       return LoadsState::StridedVectorize;
   }
   if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||

>From 4e9d5bf6b74d6678d57e4a439926cb4fc0eb22f6 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Tue, 23 Sep 2025 15:41:57 -0700
Subject: [PATCH 2/2] [SLPVectorizer] Widen constant strided loads.

Given a set of pointers, check if they can be rearranged as follows (%s is a constant):
%b + 0 * %s + 0
%b + 0 * %s + 1
%b + 0 * %s + 2
...
%b + 0 * %s + w

%b + 1 * %s + 0
%b + 1 * %s + 1
%b + 1 * %s + 2
...
%b + 1 * %s + w
...

If the pointers can be rearanged in the above pattern, it means that the
memory can be accessed with a strided loads of width `w` and stride `%s`.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 156 ++++++++++++++----
 1 file changed, 120 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4f9ec424da6d4..9f0278d445eca 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2235,8 +2235,28 @@ class BoUpSLP {
   ///       may not be necessary.
   bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
   bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
-                     Align Alignment, int64_t Diff, Value *Ptr0, Value *PtrN,
-                     StridedPtrInfo &SPtrInfo) const;
+                            Align Alignment, int64_t Diff, size_t VecSz) const;
+
+  /// Given a set of pointers, check if they can be rearranged as follows (%s is a constant):
+  ///  %b + 0 * %s + 0
+  ///  %b + 0 * %s + 1
+  ///  %b + 0 * %s + 2
+  ///  ...
+  ///  %b + 0 * %s + w
+  ///
+  ///  %b + 1 * %s + 0
+  ///  %b + 1 * %s + 1
+  ///  %b + 1 * %s + 2
+  ///  ...
+  ///  %b + 1 * %s + w
+  ///  ...
+  ///
+  ///  If the pointers can be rearanged in the above pattern, it means that the
+  ///  memory can be accessed with a strided loads of width `w` and stride `%s`.
+  bool analyzeConstantStrideCandidate(ArrayRef<Value *> PointerOps,
+                                      Type *ElemTy, Align CommonAlignment,
+                                      SmallVectorImpl<unsigned> &SortedIndices,
+                                      int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
 
   /// Checks if the given array of loads can be represented as a vectorized,
   /// scatter or just simple gather.
@@ -6817,12 +6837,7 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
 /// current graph (for masked gathers extra extractelement instructions
 /// might be required).
 bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
-                            Align Alignment, int64_t Diff, Value *Ptr0,
-                            Value *PtrN, StridedPtrInfo &SPtrInfo) const {
-  const size_t Sz = PointerOps.size();
-  if (Diff % (Sz - 1) != 0)
-    return false;
-
+                            Align Alignment, int64_t Diff, size_t VecSz) const {
   // Try to generate strided load node.
   auto IsAnyPointerUsedOutGraph = any_of(PointerOps, [&](Value *V) {
     return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
@@ -6831,41 +6846,110 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
   });
 
   const uint64_t AbsoluteDiff = std::abs(Diff);
-  auto *VecTy = getWidenedType(ScalarTy, Sz);
+  auto *VecTy = getWidenedType(ScalarTy, VecSz);
   if (IsAnyPointerUsedOutGraph ||
-      (AbsoluteDiff > Sz &&
-       (Sz > MinProfitableStridedLoads ||
-        (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
-         AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
-      Diff == -(static_cast<int64_t>(Sz) - 1)) {
-    int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
-    if (Diff != Stride * static_cast<int64_t>(Sz - 1))
+      (AbsoluteDiff > VecSz &&
+       (VecSz > MinProfitableStridedLoads ||
+        (AbsoluteDiff <= MaxProfitableLoadStride * VecSz &&
+         AbsoluteDiff % VecSz == 0 && has_single_bit(AbsoluteDiff / VecSz)))) ||
+      Diff == -(static_cast<int64_t>(VecSz) - 1)) {
+    int64_t Stride = Diff / static_cast<int64_t>(VecSz - 1);
+    if (Diff != Stride * static_cast<int64_t>(VecSz - 1))
       return false;
     if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
       return false;
+  }
+  return true;
+}
 
-    // Iterate through all pointers and check if all distances are
-    // unique multiple of Dist.
-    SmallSet<int64_t, 4> Dists;
-    for (Value *Ptr : PointerOps) {
-      int64_t Dist = 0;
-      if (Ptr == PtrN)
-        Dist = Diff;
-      else if (Ptr != Ptr0)
-        Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
-      // If the strides are not the same or repeated, we can't
-      // vectorize.
-      if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ArrayRef<Value *> PointerOps,
+                                      Type *ElemTy, Align CommonAlignment,
+                                      SmallVectorImpl<unsigned> &SortedIndices,
+                                      int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+  const unsigned Sz = PointerOps.size();
+  SmallVector<int64_t> SortedOffsetsFromBase;
+  SortedOffsetsFromBase.resize(Sz);
+  for (unsigned I : seq<unsigned>(Sz)) {
+    Value *Ptr =
+        SortedIndices.empty() ? PointerOps[I] : PointerOps[SortedIndices[I]];
+    SortedOffsetsFromBase[I] =
+        *getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, *DL, *SE);
+  }
+  assert(SortedOffsetsFromBase.size() > 1 &&
+         "Trying to generate strided load for less than 2 loads");
+  //
+  // Find where the first group ends.
+  int64_t StrideWithinGroup =
+      SortedOffsetsFromBase[1] - SortedOffsetsFromBase[0];
+  unsigned GroupSize = 1;
+  for (; GroupSize != SortedOffsetsFromBase.size(); ++GroupSize) {
+    if (SortedOffsetsFromBase[GroupSize] -
+            SortedOffsetsFromBase[GroupSize - 1] !=
+        StrideWithinGroup)
+      break;
+  }
+  unsigned VecSz = Sz;
+  Type *ScalarTy = ElemTy;
+  int64_t StrideIntVal = StrideWithinGroup;
+  FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, VecSz);
+
+  if (Sz != GroupSize) {
+    if (Sz % GroupSize != 0)
+      return false;
+    VecSz = Sz / GroupSize;
+
+    if (StrideWithinGroup != 1)
+      return false;
+    unsigned VecSz = Sz / GroupSize;
+    ScalarTy = Type::getIntNTy(SE->getContext(),
+                               DL->getTypeSizeInBits(ElemTy).getFixedValue() *
+                                   GroupSize);
+    StridedLoadTy = getWidenedType(ScalarTy, VecSz);
+    if (!TTI->isTypeLegal(StridedLoadTy) ||
+        !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+      return false;
+
+    unsigned PrevGroupStartIdx = 0;
+    unsigned CurrentGroupStartIdx = GroupSize;
+    int64_t StrideBetweenGroups =
+        SortedOffsetsFromBase[GroupSize] - SortedOffsetsFromBase[0];
+    StrideIntVal = StrideBetweenGroups;
+    while (CurrentGroupStartIdx != Sz) {
+      if (SortedOffsetsFromBase[CurrentGroupStartIdx] -
+              SortedOffsetsFromBase[PrevGroupStartIdx] !=
+          StrideBetweenGroups)
         break;
+      PrevGroupStartIdx = CurrentGroupStartIdx;
+      CurrentGroupStartIdx += GroupSize;
     }
-    if (Dists.size() == Sz) {
-      Type *StrideTy = DL->getIndexType(Ptr0->getType());
-      SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
-      SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
-      return true;
+    if (CurrentGroupStartIdx != Sz)
+      return false;
+
+    auto CheckGroup = [&](unsigned StartIdx, unsigned GroupSize0,
+                          int64_t StrideWithinGroup) -> bool {
+      unsigned GroupEndIdx = StartIdx + 1;
+      for (; GroupEndIdx != Sz; ++GroupEndIdx) {
+        if (SortedOffsetsFromBase[GroupEndIdx] -
+                SortedOffsetsFromBase[GroupEndIdx - 1] !=
+            StrideWithinGroup)
+          break;
+      }
+      return GroupEndIdx - StartIdx == GroupSize0;
+    };
+    for (unsigned I = 0; I < Sz; I += GroupSize) {
+      if (!CheckGroup(I, GroupSize, StrideWithinGroup))
+        return false;
     }
   }
-  return false;
+
+  if (!isStridedLoad(PointerOps, ScalarTy, CommonAlignment, Diff, VecSz))
+    return false;
+
+  Type *StrideTy = DL->getIndexType(Ptr0->getType());
+  SPtrInfo.StrideVal = ConstantInt::get(StrideTy, StrideIntVal);
+  SPtrInfo.Ty = StridedLoadTy;
+  return true;
 }
 
 BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
@@ -6948,8 +7032,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
                                    cast<Instruction>(V), UserIgnoreList);
                              }))
       return LoadsState::CompressVectorize;
-    if (isStridedLoad(PointerOps, ScalarTy, CommonAlignment, *Diff, Ptr0, PtrN,
-                      SPtrInfo))
+    if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, CommonAlignment,
+                                       Order, *Diff, Ptr0, PtrN, SPtrInfo))
       return LoadsState::StridedVectorize;
   }
   if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||



More information about the llvm-commits mailing list