[llvm] Analyze constant stride (PR #160411)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 15:46:40 PDT 2025
https://github.com/mgudim created https://github.com/llvm/llvm-project/pull/160411
None
>From f7aed52e5a809cc62a45d88a613b73bc1d61536c Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Tue, 23 Sep 2025 14:17:44 -0700
Subject: [PATCH 1/2] [SLPVectorizer] Change arguments of 'isStridedLoad' (NFC)
This is needed to reduce the diff for the future work on widening
strided loads. Also, with this change we'll be able to re-use this for
the case when each pointer represents a start of a group of contiguous
loads.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 41 ++++++-------------
1 file changed, 13 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1814d9a6811c0..4f9ec424da6d4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2234,10 +2234,9 @@ class BoUpSLP {
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
- bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
+ bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, int64_t Diff, Value *Ptr0, Value *PtrN,
+ StridedPtrInfo &SPtrInfo) const;
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
@@ -6817,13 +6816,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// 4. Any pointer operand is an instruction with the users outside of the
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
-bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order,
- const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff,
- StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = VL.size();
+bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, int64_t Diff, Value *Ptr0,
+ Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
if (Diff % (Sz - 1) != 0)
return false;
@@ -6835,7 +6831,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
});
const uint64_t AbsoluteDiff = std::abs(Diff);
- Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsAnyPointerUsedOutGraph ||
(AbsoluteDiff > Sz &&
@@ -6846,20 +6841,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
if (Diff != Stride * static_cast<int64_t>(Sz - 1))
return false;
- Align Alignment =
- cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
- ->getAlign();
- if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+ if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
- Value *Ptr0;
- Value *PtrN;
- if (Order.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
- } else {
- Ptr0 = PointerOps[Order.front()];
- PtrN = PointerOps[Order.back()];
- }
+
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int64_t, 4> Dists;
@@ -6868,14 +6852,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
if (Ptr == PtrN)
Dist = Diff;
else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
break;
}
if (Dists.size() == Sz) {
- Type *StrideTy = DL.getIndexType(Ptr0->getType());
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
return true;
@@ -6964,7 +6948,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
cast<Instruction>(V), UserIgnoreList);
}))
return LoadsState::CompressVectorize;
- if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
+ if (isStridedLoad(PointerOps, ScalarTy, CommonAlignment, *Diff, Ptr0, PtrN,
+ SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
>From 4e9d5bf6b74d6678d57e4a439926cb4fc0eb22f6 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Tue, 23 Sep 2025 15:41:57 -0700
Subject: [PATCH 2/2] [SLPVectorizer] Widen constant strided loads.
Given a set of pointers, check if they can be rearranged as follows (%s is a constant):
%b + 0 * %s + 0
%b + 0 * %s + 1
%b + 0 * %s + 2
...
%b + 0 * %s + w
%b + 1 * %s + 0
%b + 1 * %s + 1
%b + 1 * %s + 2
...
%b + 1 * %s + w
...
If the pointers can be rearanged in the above pattern, it means that the
memory can be accessed with a strided loads of width `w` and stride `%s`.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 156 ++++++++++++++----
1 file changed, 120 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4f9ec424da6d4..9f0278d445eca 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2235,8 +2235,28 @@ class BoUpSLP {
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, int64_t Diff, Value *Ptr0, Value *PtrN,
- StridedPtrInfo &SPtrInfo) const;
+ Align Alignment, int64_t Diff, size_t VecSz) const;
+
+ /// Given a set of pointers, check if they can be rearranged as follows (%s is a constant):
+ /// %b + 0 * %s + 0
+ /// %b + 0 * %s + 1
+ /// %b + 0 * %s + 2
+ /// ...
+ /// %b + 0 * %s + w
+ ///
+ /// %b + 1 * %s + 0
+ /// %b + 1 * %s + 1
+ /// %b + 1 * %s + 2
+ /// ...
+ /// %b + 1 * %s + w
+ /// ...
+ ///
+ /// If the pointers can be rearanged in the above pattern, it means that the
+ /// memory can be accessed with a strided loads of width `w` and stride `%s`.
+ bool analyzeConstantStrideCandidate(ArrayRef<Value *> PointerOps,
+ Type *ElemTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
@@ -6817,12 +6837,7 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = PointerOps.size();
- if (Diff % (Sz - 1) != 0)
- return false;
-
+ Align Alignment, int64_t Diff, size_t VecSz) const {
// Try to generate strided load node.
auto IsAnyPointerUsedOutGraph = any_of(PointerOps, [&](Value *V) {
return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
@@ -6831,41 +6846,110 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
});
const uint64_t AbsoluteDiff = std::abs(Diff);
- auto *VecTy = getWidenedType(ScalarTy, Sz);
+ auto *VecTy = getWidenedType(ScalarTy, VecSz);
if (IsAnyPointerUsedOutGraph ||
- (AbsoluteDiff > Sz &&
- (Sz > MinProfitableStridedLoads ||
- (AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
- AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
- Diff == -(static_cast<int64_t>(Sz) - 1)) {
- int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
- if (Diff != Stride * static_cast<int64_t>(Sz - 1))
+ (AbsoluteDiff > VecSz &&
+ (VecSz > MinProfitableStridedLoads ||
+ (AbsoluteDiff <= MaxProfitableLoadStride * VecSz &&
+ AbsoluteDiff % VecSz == 0 && has_single_bit(AbsoluteDiff / VecSz)))) ||
+ Diff == -(static_cast<int64_t>(VecSz) - 1)) {
+ int64_t Stride = Diff / static_cast<int64_t>(VecSz - 1);
+ if (Diff != Stride * static_cast<int64_t>(VecSz - 1))
return false;
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
+ }
+ return true;
+}
- // Iterate through all pointers and check if all distances are
- // unique multiple of Dist.
- SmallSet<int64_t, 4> Dists;
- for (Value *Ptr : PointerOps) {
- int64_t Dist = 0;
- if (Ptr == PtrN)
- Dist = Diff;
- else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
- // If the strides are not the same or repeated, we can't
- // vectorize.
- if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ArrayRef<Value *> PointerOps,
+ Type *ElemTy, Align CommonAlignment,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ int64_t Diff, Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const unsigned Sz = PointerOps.size();
+ SmallVector<int64_t> SortedOffsetsFromBase;
+ SortedOffsetsFromBase.resize(Sz);
+ for (unsigned I : seq<unsigned>(Sz)) {
+ Value *Ptr =
+ SortedIndices.empty() ? PointerOps[I] : PointerOps[SortedIndices[I]];
+ SortedOffsetsFromBase[I] =
+ *getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, *DL, *SE);
+ }
+ assert(SortedOffsetsFromBase.size() > 1 &&
+ "Trying to generate strided load for less than 2 loads");
+ //
+ // Find where the first group ends.
+ int64_t StrideWithinGroup =
+ SortedOffsetsFromBase[1] - SortedOffsetsFromBase[0];
+ unsigned GroupSize = 1;
+ for (; GroupSize != SortedOffsetsFromBase.size(); ++GroupSize) {
+ if (SortedOffsetsFromBase[GroupSize] -
+ SortedOffsetsFromBase[GroupSize - 1] !=
+ StrideWithinGroup)
+ break;
+ }
+ unsigned VecSz = Sz;
+ Type *ScalarTy = ElemTy;
+ int64_t StrideIntVal = StrideWithinGroup;
+ FixedVectorType *StridedLoadTy = getWidenedType(ScalarTy, VecSz);
+
+ if (Sz != GroupSize) {
+ if (Sz % GroupSize != 0)
+ return false;
+ VecSz = Sz / GroupSize;
+
+ if (StrideWithinGroup != 1)
+ return false;
+ unsigned VecSz = Sz / GroupSize;
+ ScalarTy = Type::getIntNTy(SE->getContext(),
+ DL->getTypeSizeInBits(ElemTy).getFixedValue() *
+ GroupSize);
+ StridedLoadTy = getWidenedType(ScalarTy, VecSz);
+ if (!TTI->isTypeLegal(StridedLoadTy) ||
+ !TTI->isLegalStridedLoadStore(StridedLoadTy, CommonAlignment))
+ return false;
+
+ unsigned PrevGroupStartIdx = 0;
+ unsigned CurrentGroupStartIdx = GroupSize;
+ int64_t StrideBetweenGroups =
+ SortedOffsetsFromBase[GroupSize] - SortedOffsetsFromBase[0];
+ StrideIntVal = StrideBetweenGroups;
+ while (CurrentGroupStartIdx != Sz) {
+ if (SortedOffsetsFromBase[CurrentGroupStartIdx] -
+ SortedOffsetsFromBase[PrevGroupStartIdx] !=
+ StrideBetweenGroups)
break;
+ PrevGroupStartIdx = CurrentGroupStartIdx;
+ CurrentGroupStartIdx += GroupSize;
}
- if (Dists.size() == Sz) {
- Type *StrideTy = DL->getIndexType(Ptr0->getType());
- SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
- SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
- return true;
+ if (CurrentGroupStartIdx != Sz)
+ return false;
+
+ auto CheckGroup = [&](unsigned StartIdx, unsigned GroupSize0,
+ int64_t StrideWithinGroup) -> bool {
+ unsigned GroupEndIdx = StartIdx + 1;
+ for (; GroupEndIdx != Sz; ++GroupEndIdx) {
+ if (SortedOffsetsFromBase[GroupEndIdx] -
+ SortedOffsetsFromBase[GroupEndIdx - 1] !=
+ StrideWithinGroup)
+ break;
+ }
+ return GroupEndIdx - StartIdx == GroupSize0;
+ };
+ for (unsigned I = 0; I < Sz; I += GroupSize) {
+ if (!CheckGroup(I, GroupSize, StrideWithinGroup))
+ return false;
}
}
- return false;
+
+ if (!isStridedLoad(PointerOps, ScalarTy, CommonAlignment, Diff, VecSz))
+ return false;
+
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
+ SPtrInfo.StrideVal = ConstantInt::get(StrideTy, StrideIntVal);
+ SPtrInfo.Ty = StridedLoadTy;
+ return true;
}
BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
@@ -6948,8 +7032,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
cast<Instruction>(V), UserIgnoreList);
}))
return LoadsState::CompressVectorize;
- if (isStridedLoad(PointerOps, ScalarTy, CommonAlignment, *Diff, Ptr0, PtrN,
- SPtrInfo))
+ if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, CommonAlignment,
+ Order, *Diff, Ptr0, PtrN, SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
More information about the llvm-commits
mailing list