[llvm] [SLPVectorizer][NFC] Avoid calling `calculateRtStride` twice. (PR #152359)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 11:20:43 PDT 2025
https://github.com/mgudim created https://github.com/llvm/llvm-project/pull/152359
Before this patch, we call it once when we check if run-time strided load is legal, and call it second time when we generate the code.
Instead we add a `StrideSCEV` field to `TreeEntry`, when `calculateRtStride` returns `true`, this field is set to the SCEV of run-time stride. we return `true`.
>From 3f47ccb1d265f2f3281e244ad14b1618a7704fcb Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Wed, 6 Aug 2025 09:11:07 -0700
Subject: [PATCH] [SLPVectorizer][NFC] Avoid calling `calculateRtStride` twice.
Before this patch, we call it once when we check if run-time strided load is legal, and
call it second time when we generate the code.
Instead we add a `StrideSCEV` field to `TreeEntry`, when
`calculateRtStride` returns `true`, this field is set to the SCEV of run-time stride.
we return `true`.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 108 +++++++++---------
1 file changed, 53 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5d0e2f9518a51..c4198159e039a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2199,6 +2199,7 @@ class BoUpSLP {
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps,
+ const SCEV **StrideSCEV = nullptr,
unsigned *BestVF = nullptr,
bool TryRecursiveCheck = true) const;
@@ -3879,6 +3880,10 @@ class BoUpSLP {
};
EntryState State;
+ // If we decide to generate a strided load for this Entry, `StrideSCEV`
+ // will be set.
+ const SCEV *StrideSCEV = nullptr;
+
/// List of combined opcodes supported by the vectorizer.
enum CombinedOpcode {
NotCombinedOp = -1,
@@ -4430,11 +4435,10 @@ class BoUpSLP {
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
- TreeEntry::EntryState
- getScalarsVectorizationState(const InstructionsState &S, ArrayRef<Value *> VL,
- bool IsScatterVectorizeUserTE,
- OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps);
+ TreeEntry::EntryState getScalarsVectorizationState(
+ const InstructionsState &S, ArrayRef<Value *> VL,
+ bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV);
/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
@@ -5734,17 +5738,13 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
}
/// Checks if the provided list of pointers \p Pointers represents the strided
-/// pointers for type ElemTy. If they are not, std::nullopt is returned.
-/// Otherwise, if \p Inst is not specified, just initialized optional value is
-/// returned to show that the pointers represent strided pointers. If \p Inst
-/// specified, the runtime stride is materialized before the given \p Inst.
-/// \returns std::nullopt if the pointers are not pointers with the runtime
-/// stride, nullptr or actual stride value, otherwise.
-static std::optional<Value *>
-calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
- const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices,
- Instruction *Inst = nullptr) {
+/// pointers for type ElemTy. If they are not, `false` is returned.
+/// Otherwise, if `true` is returned. If `StrideSCEV` is not nullptr,
+/// it is set the the SCEV of the run-time stride.
+static bool calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices,
+ const SCEV **StrideSCEV) {
SmallVector<const SCEV *> SCEVs;
const SCEV *PtrSCEVLowest = nullptr;
const SCEV *PtrSCEVHighest = nullptr;
@@ -5753,7 +5753,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
for (Value *Ptr : PointerOps) {
const SCEV *PtrSCEV = SE.getSCEV(Ptr);
if (!PtrSCEV)
- return std::nullopt;
+ return false;
SCEVs.push_back(PtrSCEV);
if (!PtrSCEVLowest && !PtrSCEVHighest) {
PtrSCEVLowest = PtrSCEVHighest = PtrSCEV;
@@ -5761,14 +5761,14 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
}
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Diff))
- return std::nullopt;
+ return false;
if (Diff->isNonConstantNegative()) {
PtrSCEVLowest = PtrSCEV;
continue;
}
const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV);
if (isa<SCEVCouldNotCompute>(Diff1))
- return std::nullopt;
+ return false;
if (Diff1->isNonConstantNegative()) {
PtrSCEVHighest = PtrSCEV;
continue;
@@ -5777,7 +5777,7 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
// Dist = PtrSCEVHighest - PtrSCEVLowest;
const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest);
if (isa<SCEVCouldNotCompute>(Dist))
- return std::nullopt;
+ return false;
int Size = DL.getTypeStoreSize(ElemTy);
auto TryGetStride = [&](const SCEV *Dist,
const SCEV *Multiplier) -> const SCEV * {
@@ -5798,10 +5798,10 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Sz = SE.getConstant(Dist->getType(), Size * (SCEVs.size() - 1));
Stride = TryGetStride(Dist, Sz);
if (!Stride)
- return std::nullopt;
+ return false;
}
if (!Stride || isa<SCEVConstant>(Stride))
- return std::nullopt;
+ return false;
// Iterate through all pointers and check if all distances are
// unique multiple of Stride.
using DistOrdPair = std::pair<int64_t, int>;
@@ -5815,28 +5815,28 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
const SCEV *Coeff = TryGetStride(Diff, Stride);
if (!Coeff)
- return std::nullopt;
+ return false;
const auto *SC = dyn_cast<SCEVConstant>(Coeff);
if (!SC || isa<SCEVCouldNotCompute>(SC))
- return std::nullopt;
+ return false;
if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
SE.getMulExpr(Stride, SC)))
->isZero())
- return std::nullopt;
+ return false;
Dist = SC->getAPInt().getZExtValue();
}
// If the strides are not the same or repeated, we can't vectorize.
if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size())
- return std::nullopt;
+ return false;
auto Res = Offsets.emplace(Dist, Cnt);
if (!Res.second)
- return std::nullopt;
+ return false;
// Consecutive order if the inserted element is the last one.
IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
++Cnt;
}
if (Offsets.size() != SCEVs.size())
- return std::nullopt;
+ return false;
SortedIndices.clear();
if (!IsConsecutive) {
// Fill SortedIndices array only if it is non-consecutive.
@@ -5847,10 +5847,9 @@ calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
++Cnt;
}
}
- if (!Inst)
- return nullptr;
- SCEVExpander Expander(SE, DL, "strided-load-vec");
- return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
+ if (StrideSCEV)
+ *StrideSCEV = Stride;
+ return true;
}
static std::pair<InstructionCost, InstructionCost>
@@ -6246,11 +6245,10 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
return false;
}
-BoUpSLP::LoadsState
-BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
- SmallVectorImpl<unsigned> &Order,
- SmallVectorImpl<Value *> &PointerOps,
- unsigned *BestVF, bool TryRecursiveCheck) const {
+BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
+ ArrayRef<Value *> VL, const Value *VL0, SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV,
+ unsigned *BestVF, bool TryRecursiveCheck) const {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
@@ -6289,7 +6287,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
if (!IsSorted) {
if (Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy)) {
if (TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) &&
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order))
+ calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order, StrideSCEV))
return LoadsState::StridedVectorize;
}
@@ -6418,9 +6416,9 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
SmallVector<unsigned> Order;
SmallVector<Value *> PointerOps;
- LoadsState LS =
- canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, BestVF,
- /*TryRecursiveCheck=*/false);
+ LoadsState LS = canVectorizeLoads(Slice, Slice.front(), Order,
+ PointerOps, StrideSCEV, BestVF,
+ /*TryRecursiveCheck=*/false);
// Check that the sorted loads are consecutive.
if (LS == LoadsState::Gather) {
if (BestVF) {
@@ -8565,8 +8563,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
// Try to build vector load.
ArrayRef<Value *> Values(
reinterpret_cast<Value *const *>(Slice.begin()), Slice.size());
- LoadsState LS = canVectorizeLoads(Values, Slice.front(), CurrentOrder,
- PointerOps, &BestVF);
+ LoadsState LS =
+ canVectorizeLoads(Values, Slice.front(), CurrentOrder, PointerOps,
+ /*StrideSCEV = */ nullptr, &BestVF);
if (LS != LoadsState::Gather ||
(BestVF > 1 && static_cast<unsigned>(NumElts) == 2 * BestVF)) {
if (LS == LoadsState::ScatterVectorize) {
@@ -9171,7 +9170,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
const InstructionsState &S, ArrayRef<Value *> VL,
bool IsScatterVectorizeUserTE, OrdersType &CurrentOrder,
- SmallVectorImpl<Value *> &PointerOps) {
+ SmallVectorImpl<Value *> &PointerOps, const SCEV **StrideSCEV) {
assert(S.getMainOp() &&
"Expected instructions with same/alternate opcodes only.");
@@ -9273,7 +9272,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
});
});
};
- switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
+ switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps, StrideSCEV)) {
case LoadsState::Vectorize:
return TreeEntry::Vectorize;
case LoadsState::CompressVectorize:
@@ -10710,8 +10709,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
OrdersType CurrentOrder;
SmallVector<Value *> PointerOps;
+ const SCEV *StrideSCEV = nullptr;
TreeEntry::EntryState State = getScalarsVectorizationState(
- S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
+ S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps, &StrideSCEV);
if (State == TreeEntry::NeedToGather) {
newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
return;
@@ -10871,6 +10871,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
+ TE->StrideSCEV = StrideSCEV;
LLVM_DEBUG(dbgs() << "SLP: added a new TreeEntry (strided LoadInst).\n";
TE->dump());
break;
@@ -18711,16 +18712,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
DL->getTypeAllocSize(ScalarTy));
} else {
- SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
- transform(E->Scalars, PointerOps.begin(), [](Value *V) {
- return cast<LoadInst>(V)->getPointerOperand();
- });
- OrdersType Order;
- std::optional<Value *> Stride =
- calculateRtStride(PointerOps, ScalarTy, *DL, *SE, Order,
- &*Builder.GetInsertPoint());
+ const SCEV *StrideSCEV = E->StrideSCEV;
+ assert(StrideSCEV);
+ SCEVExpander Expander(*SE, *DL, "strided-load-vec");
+ Value *Stride = Expander.expandCodeFor(
+ StrideSCEV, StrideSCEV->getType(), &*Builder.GetInsertPoint());
Value *NewStride =
- Builder.CreateIntCast(*Stride, StrideTy, /*isSigned=*/true);
+ Builder.CreateIntCast(Stride, StrideTy, /*isSigned=*/true);
StrideVal = Builder.CreateMul(
NewStride,
ConstantInt::get(
More information about the llvm-commits
mailing list