[llvm] c807395 - [LAA/SLP] Don't truncate APInt in getPointersDiff (#139941)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 02:08:10 PDT 2025
Author: Ramkumar Ramachandra
Date: 2025-05-15T10:08:05+01:00
New Revision: c807395011a027caae9ac196edfac328fb90443a
URL: https://github.com/llvm/llvm-project/commit/c807395011a027caae9ac196edfac328fb90443a
DIFF: https://github.com/llvm/llvm-project/commit/c807395011a027caae9ac196edfac328fb90443a.diff
LOG: [LAA/SLP] Don't truncate APInt in getPointersDiff (#139941)
Change getPointersDiff to return an std::optional<int64_t>, and fill
this value with using APInt::trySExtValue. This simple change requires
changes to other functions in LAA, and major changes in SLPVectorizer
changing types from 32-bit to 64-bit.
Fixes #139202.
Added:
Modified:
llvm/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index f715e0ec8dbb4..fea2ede8b5ab4 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -853,11 +853,10 @@ getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
/// is a simple API that does not depend on the analysis pass.
/// \param StrictCheck Ensure that the calculated distance matches the
/// type-based one after all the bitcasts removal in the provided pointers.
-std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
- Value *PtrB, const DataLayout &DL,
- ScalarEvolution &SE,
- bool StrictCheck = false,
- bool CheckType = true);
+std::optional<int64_t>
+getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB,
+ const DataLayout &DL, ScalarEvolution &SE,
+ bool StrictCheck = false, bool CheckType = true);
/// Attempt to sort the pointers in \p VL and return the sorted indices
/// in \p SortedIndices, if reordering is required.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index af1a3c593c514..ab407e945bc53 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1541,11 +1541,11 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
-std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
- Type *ElemTyB, Value *PtrB,
- const DataLayout &DL,
- ScalarEvolution &SE, bool StrictCheck,
- bool CheckType) {
+std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
+ Type *ElemTyB, Value *PtrB,
+ const DataLayout &DL,
+ ScalarEvolution &SE,
+ bool StrictCheck, bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
// Make sure that A and B are
diff erent pointers.
@@ -1570,7 +1570,7 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
DL, OffsetB, /*AllowNonInbounds=*/true);
- int Val;
+ std::optional<int64_t> Val;
if (PtrA1 == PtrB1) {
// Retrieve the address space again as pointer stripping now tracks through
// `addrspacecast`.
@@ -1585,7 +1585,7 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
OffsetB = OffsetB.sextOrTrunc(IdxWidth);
OffsetB -= OffsetA;
- Val = OffsetB.getSExtValue();
+ Val = OffsetB.trySExtValue();
} else {
// Otherwise compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
@@ -1594,10 +1594,14 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
if (!Diff)
return std::nullopt;
- Val = Diff->getSExtValue();
+ Val = Diff->trySExtValue();
}
- int Size = DL.getTypeStoreSize(ElemTyA);
- int Dist = Val / Size;
+
+ if (!Val)
+ return std::nullopt;
+
+ int64_t Size = DL.getTypeStoreSize(ElemTyA);
+ int64_t Dist = *Val / Size;
// Ensure that the calculated distance matches the type-based one after all
// the bitcasts removal in the provided pointers.
@@ -1616,14 +1620,15 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
// first pointer in the array.
Value *Ptr0 = VL[0];
- using DistOrdPair = std::pair<int64_t, int>;
+ using DistOrdPair = std::pair<int64_t, unsigned>;
auto Compare = llvm::less_first();
std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
Offsets.emplace(0, 0);
bool IsConsecutive = true;
for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
- std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
- /*StrictCheck=*/true);
+ std::optional<int64_t> Diff =
+ getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
+ /*StrictCheck=*/true);
if (!Diff)
return false;
@@ -1654,7 +1659,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return false;
Type *ElemTyA = getLoadStoreType(A);
Type *ElemTyB = getLoadStoreType(B);
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
/*StrictCheck=*/true, CheckType);
return Diff && *Diff == 1;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d8cf6b82a197d..eb339282fdae8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1584,7 +1584,7 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
/// before: 6 9 5 4 9 2 1 0
/// after: 6 3 5 4 7 2 1 0
static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
- const unsigned Sz = Order.size();
+ const size_t Sz = Order.size();
SmallBitVector UnusedIndices(Sz, /*t=*/true);
SmallBitVector MaskedIndices(Sz);
for (unsigned I = 0; I < Sz; ++I) {
@@ -2216,7 +2216,7 @@ class BoUpSLP {
!LI2->isSimple())
return CheckSameEntryOrFail();
- std::optional<int> Dist = getPointersDiff(
+ std::optional<int64_t> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
if (!Dist || *Dist == 0) {
@@ -3619,9 +3619,10 @@ class BoUpSLP {
/// vector loads/masked gathers instead of regular gathers. Later these loads
/// are reshufled to build final gathered nodes.
void tryToVectorizeGatheredLoads(
- const SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
- SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
- 8> &GatheredLoads);
+ const SmallMapVector<
+ std::tuple<BasicBlock *, Value *, Type *>,
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>>, 8>
+ &GatheredLoads);
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
@@ -5368,7 +5369,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
// patterns.
SmallVector<Value *> GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
Type *ScalarTy = GatheredScalars.front()->getType();
- int NumScalars = GatheredScalars.size();
+ size_t NumScalars = GatheredScalars.size();
if (!isValidElementType(ScalarTy))
return std::nullopt;
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
@@ -5442,7 +5443,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
unsigned Limit = getNumElems(CurrentOrder.size(), PartSz, I);
MutableArrayRef<unsigned> Slice = CurrentOrder.slice(I * PartSz, Limit);
// Shuffle of at least 2 vectors - ignore.
- if (any_of(Slice, [&](int I) { return I != NumScalars; })) {
+ if (any_of(Slice, [&](unsigned I) { return I != NumScalars; })) {
std::fill(Slice.begin(), Slice.end(), NumScalars);
ShuffledSubMasks.set(I);
continue;
@@ -5540,8 +5541,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
return std::max(Entries[I].front()->getVectorFactor(),
Entries[I].back()->getVectorFactor());
});
- int NumUndefs =
- count_if(CurrentOrder, [&](int Idx) { return Idx == NumScalars; });
+ unsigned NumUndefs =
+ count_if(CurrentOrder, [&](unsigned Idx) { return Idx == NumScalars; });
if (ShuffledSubMasks.all() || (NumScalars > 2 && NumUndefs >= NumScalars / 2))
return std::nullopt;
return std::move(CurrentOrder);
@@ -5868,7 +5869,11 @@ static bool buildCompressMask(ArrayRef<Value *> PointerOps,
Value *Ptr0 = Order.empty() ? PointerOps.front() : PointerOps[Order.front()];
for (unsigned I : seq<unsigned>(1, Sz)) {
Value *Ptr = Order.empty() ? PointerOps[I] : PointerOps[Order[I]];
- unsigned Pos = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ std::optional<int64_t> OptPos =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ if (!OptPos || OptPos > std::numeric_limits<unsigned>::max())
+ return false;
+ unsigned Pos = static_cast<unsigned>(*OptPos);
CompressMask[I] = Pos;
if (!Stride)
continue;
@@ -5894,7 +5899,7 @@ static bool isMaskedLoadCompress(
VectorType *&LoadVecTy) {
InterleaveFactor = 0;
Type *ScalarTy = VL.front()->getType();
- const unsigned Sz = VL.size();
+ const size_t Sz = VL.size();
auto *VecTy = getWidenedType(ScalarTy, Sz);
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
SmallVector<int> Mask;
@@ -5921,11 +5926,11 @@ static bool isMaskedLoadCompress(
Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];
}
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
if (!Diff)
return false;
- const unsigned MaxRegSize =
+ const size_t MaxRegSize =
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
.getFixedValue();
// Check for very large distances between elements.
@@ -6051,9 +6056,10 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
ArrayRef<unsigned> Order,
const TargetTransformInfo &TTI, const DataLayout &DL,
ScalarEvolution &SE,
- const bool IsAnyPointerUsedOutGraph, const int Diff) {
- const unsigned Sz = VL.size();
- const unsigned AbsoluteDiff = std::abs(Diff);
+ const bool IsAnyPointerUsedOutGraph,
+ const int64_t Diff) {
+ const size_t Sz = VL.size();
+ const uint64_t AbsoluteDiff = std::abs(Diff);
Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsAnyPointerUsedOutGraph ||
@@ -6061,9 +6067,9 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
(Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) ||
- Diff == -(static_cast<int>(Sz) - 1)) {
- int Stride = Diff / static_cast<int>(Sz - 1);
- if (Diff != Stride * static_cast<int>(Sz - 1))
+ Diff == -(static_cast<int64_t>(Sz) - 1)) {
+ int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
+ if (Diff != Stride * static_cast<int64_t>(Sz - 1))
return false;
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
@@ -6081,9 +6087,9 @@ static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
}
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
- SmallSet<int, 4> Dists;
+ SmallSet<int64_t, 4> Dists;
for (Value *Ptr : PointerOps) {
- int Dist = 0;
+ int64_t Dist = 0;
if (Ptr == PtrN)
Dist = Diff;
else if (Ptr != Ptr0)
@@ -6122,7 +6128,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
// Make sure all loads in the bundle are simple - we can't vectorize
// atomic or volatile loads.
PointerOps.clear();
- const unsigned Sz = VL.size();
+ const size_t Sz = VL.size();
PointerOps.resize(Sz);
auto *POIter = PointerOps.begin();
for (Value *V : VL) {
@@ -6165,10 +6171,10 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];
}
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted loads are consecutive.
- if (static_cast<unsigned>(*Diff) == Sz - 1)
+ if (static_cast<uint64_t>(*Diff) == Sz - 1)
return LoadsState::Vectorize;
if (isMaskedLoadCompress(VL, PointerOps, Order, *TTI, *DL, *SE, *AC, *DT,
*TLI, [&](Value *V) {
@@ -6427,8 +6433,9 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
// Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each
// Ptr into, sort and return the sorted indices with values next to one
// another.
- SmallMapVector<std::pair<BasicBlock *, Value *>,
- SmallVector<SmallVector<std::tuple<Value *, int, unsigned>>>, 8>
+ SmallMapVector<
+ std::pair<BasicBlock *, Value *>,
+ SmallVector<SmallVector<std::tuple<Value *, int64_t, unsigned>>>, 8>
Bases;
Bases
.try_emplace(std::make_pair(
@@ -6441,10 +6448,10 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
getUnderlyingObject(Ptr, RecursionMaxDepth));
bool Found = any_of(Bases.try_emplace(Key).first->second,
[&, &Cnt = Cnt, &Ptr = Ptr](auto &Base) {
- std::optional<int> Diff = getPointersDiff(
- ElemTy, std::get<0>(Base.front()), ElemTy,
- Ptr, DL, SE,
- /*StrictCheck=*/true);
+ std::optional<int64_t> Diff =
+ getPointersDiff(ElemTy, std::get<0>(Base.front()),
+ ElemTy, Ptr, DL, SE,
+ /*StrictCheck=*/true);
if (!Diff)
return false;
@@ -6494,10 +6501,11 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
for (auto &Vec : Base.second) {
if (Vec.size() > 1) {
stable_sort(Vec, llvm::less_second());
- int InitialOffset = std::get<1>(Vec[0]);
+ int64_t InitialOffset = std::get<1>(Vec[0]);
bool AnyConsecutive =
all_of(enumerate(Vec), [InitialOffset](const auto &P) {
- return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
+ return std::get<1>(P.value()) ==
+ int64_t(P.index()) + InitialOffset;
});
// Fill SortedIndices array only if it looks worth-while to sort the
// ptrs.
@@ -7007,7 +7015,7 @@ static void combineOrders(MutableArrayRef<unsigned> Order,
ArrayRef<unsigned> SecondaryOrder) {
assert((SecondaryOrder.empty() || Order.size() == SecondaryOrder.size()) &&
"Expected same size of orders");
- unsigned Sz = Order.size();
+ size_t Sz = Order.size();
SmallBitVector UsedIndices(Sz);
for (unsigned Idx : seq<unsigned>(0, Sz)) {
if (Order[Idx] != Sz)
@@ -7999,7 +8007,7 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
if (StoresVec.size() > Lane)
continue;
if (!StoresVec.empty()) {
- std::optional<int> Diff = getPointersDiff(
+ std::optional<int64_t> Diff = getPointersDiff(
SI->getValueOperand()->getType(), SI->getPointerOperand(),
SI->getValueOperand()->getType(),
StoresVec.front()->getPointerOperand(), *DL, *SE,
@@ -8027,14 +8035,14 @@ bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
// To avoid calling getPointersDiff() while sorting we create a vector of
// pairs {store, offset from first} and sort this instead.
- SmallVector<std::pair<int, unsigned>> StoreOffsetVec;
+ SmallVector<std::pair<int64_t, unsigned>> StoreOffsetVec;
StoreInst *S0 = StoresVec[0];
StoreOffsetVec.emplace_back(0, 0);
Type *S0Ty = S0->getValueOperand()->getType();
Value *S0Ptr = S0->getPointerOperand();
for (unsigned Idx : seq<unsigned>(1, StoresVec.size())) {
StoreInst *SI = StoresVec[Idx];
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(),
SI->getPointerOperand(), *DL, *SE,
/*StrictCheck=*/true);
@@ -8046,7 +8054,7 @@ bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
return false;
sort(StoreOffsetVec, llvm::less_first());
unsigned Idx = 0;
- int PrevDist = 0;
+ int64_t PrevDist = 0;
for (const auto &P : StoreOffsetVec) {
if (Idx > 0 && P.first != PrevDist + 1)
return false;
@@ -8130,15 +8138,15 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
static void gatherPossiblyVectorizableLoads(
const BoUpSLP &R, ArrayRef<Value *> VL, const DataLayout &DL,
ScalarEvolution &SE, const TargetTransformInfo &TTI,
- SmallVectorImpl<SmallVector<std::pair<LoadInst *, int>>> &GatheredLoads,
+ SmallVectorImpl<SmallVector<std::pair<LoadInst *, int64_t>>> &GatheredLoads,
bool AddNew = true) {
if (VL.empty())
return;
Type *ScalarTy = getValueType(VL.front());
if (!isValidElementType(ScalarTy))
return;
- SmallVector<SmallVector<std::pair<LoadInst *, int>>> ClusteredLoads;
- SmallVector<DenseMap<int, LoadInst *>> ClusteredDistToLoad;
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>> ClusteredLoads;
+ SmallVector<DenseMap<int64_t, LoadInst *>> ClusteredDistToLoad;
for (Value *V : VL) {
auto *LI = dyn_cast<LoadInst>(V);
if (!LI)
@@ -8154,7 +8162,7 @@ static void gatherPossiblyVectorizableLoads(
RecursionMaxDepth) &&
"Expected loads with the same type, same parent and same "
"underlying pointer.");
- std::optional<int> Dist = getPointersDiff(
+ std::optional<int64_t> Dist = getPointersDiff(
LI->getType(), LI->getPointerOperand(), Data.front().first->getType(),
Data.front().first->getPointerOperand(), DL, SE,
/*StrictCheck=*/true);
@@ -8176,11 +8184,11 @@ static void gatherPossiblyVectorizableLoads(
}
}
auto FindMatchingLoads =
- [&](ArrayRef<std::pair<LoadInst *, int>> Loads,
- SmallVectorImpl<SmallVector<std::pair<LoadInst *, int>>>
+ [&](ArrayRef<std::pair<LoadInst *, int64_t>> Loads,
+ SmallVectorImpl<SmallVector<std::pair<LoadInst *, int64_t>>>
&GatheredLoads,
SetVector<unsigned> &ToAdd, SetVector<unsigned> &Repeated,
- int &Offset, unsigned &Start) {
+ int64_t &Offset, unsigned &Start) {
if (Loads.empty())
return GatheredLoads.end();
LoadInst *LI = Loads.front().first;
@@ -8191,16 +8199,16 @@ static void gatherPossiblyVectorizableLoads(
if (LI->getParent() != Data.front().first->getParent() ||
LI->getType() != Data.front().first->getType())
continue;
- std::optional<int> Dist =
+ std::optional<int64_t> Dist =
getPointersDiff(LI->getType(), LI->getPointerOperand(),
Data.front().first->getType(),
Data.front().first->getPointerOperand(), DL, SE,
/*StrictCheck=*/true);
if (!Dist)
continue;
- SmallSet<int, 4> DataDists;
+ SmallSet<int64_t, 4> DataDists;
SmallPtrSet<LoadInst *, 4> DataLoads;
- for (std::pair<LoadInst *, int> P : Data) {
+ for (std::pair<LoadInst *, int64_t> P : Data) {
DataDists.insert(P.second);
DataLoads.insert(P.first);
}
@@ -8231,10 +8239,10 @@ static void gatherPossiblyVectorizableLoads(
ToAdd.clear();
return GatheredLoads.end();
};
- for (ArrayRef<std::pair<LoadInst *, int>> Data : ClusteredLoads) {
+ for (ArrayRef<std::pair<LoadInst *, int64_t>> Data : ClusteredLoads) {
unsigned Start = 0;
SetVector<unsigned> ToAdd, LocalToAdd, Repeated;
- int Offset = 0;
+ int64_t Offset = 0;
auto *It = FindMatchingLoads(Data, GatheredLoads, LocalToAdd, Repeated,
Offset, Start);
while (It != GatheredLoads.end()) {
@@ -8249,7 +8257,7 @@ static void gatherPossiblyVectorizableLoads(
return !ToAdd.contains(Idx) && !Repeated.contains(Idx);
})) {
auto AddNewLoads =
- [&](SmallVectorImpl<std::pair<LoadInst *, int>> &Loads) {
+ [&](SmallVectorImpl<std::pair<LoadInst *, int64_t>> &Loads) {
for (unsigned Idx : seq<unsigned>(Data.size())) {
if (ToAdd.contains(Idx) || Repeated.contains(Idx))
continue;
@@ -8259,7 +8267,7 @@ static void gatherPossiblyVectorizableLoads(
if (!AddNew) {
LoadInst *LI = Data.front().first;
It = find_if(
- GatheredLoads, [&](ArrayRef<std::pair<LoadInst *, int>> PD) {
+ GatheredLoads, [&](ArrayRef<std::pair<LoadInst *, int64_t>> PD) {
return PD.front().first->getParent() == LI->getParent() &&
PD.front().first->getType() == LI->getType();
});
@@ -8267,7 +8275,7 @@ static void gatherPossiblyVectorizableLoads(
AddNewLoads(*It);
It = std::find_if(
std::next(It), GatheredLoads.end(),
- [&](ArrayRef<std::pair<LoadInst *, int>> PD) {
+ [&](ArrayRef<std::pair<LoadInst *, int64_t>> PD) {
return PD.front().first->getParent() == LI->getParent() &&
PD.front().first->getType() == LI->getType();
});
@@ -8280,9 +8288,10 @@ static void gatherPossiblyVectorizableLoads(
}
void BoUpSLP::tryToVectorizeGatheredLoads(
- const SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
- SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
- 8> &GatheredLoads) {
+ const SmallMapVector<
+ std::tuple<BasicBlock *, Value *, Type *>,
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>>, 8>
+ &GatheredLoads) {
GatheredLoadsEntriesFirst = VectorizableTree.size();
SmallVector<SmallPtrSet<const Value *, 4>> LoadSetsToVectorize(
@@ -8291,8 +8300,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
Set.insert_range(VectorizableTree[Idx]->Scalars);
// Sort loads by distance.
- auto LoadSorter = [](const std::pair<LoadInst *, int> &L1,
- const std::pair<LoadInst *, int> &L2) {
+ auto LoadSorter = [](const std::pair<LoadInst *, int64_t> &L1,
+ const std::pair<LoadInst *, int64_t> &L2) {
return L1.second > L2.second;
};
@@ -8454,28 +8463,30 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
};
auto ProcessGatheredLoads =
[&, &TTI = *TTI](
- ArrayRef<SmallVector<std::pair<LoadInst *, int>>> GatheredLoads,
+ ArrayRef<SmallVector<std::pair<LoadInst *, int64_t>>> GatheredLoads,
bool Final = false) {
SmallVector<LoadInst *> NonVectorized;
- for (ArrayRef<std::pair<LoadInst *, int>> LoadsDists : GatheredLoads) {
+ for (ArrayRef<std::pair<LoadInst *, int64_t>> LoadsDists :
+ GatheredLoads) {
if (LoadsDists.size() <= 1) {
NonVectorized.push_back(LoadsDists.back().first);
continue;
}
- SmallVector<std::pair<LoadInst *, int>> LocalLoadsDists(LoadsDists);
+ SmallVector<std::pair<LoadInst *, int64_t>> LocalLoadsDists(
+ LoadsDists);
SmallVector<LoadInst *> OriginalLoads(make_first_range(LoadsDists));
stable_sort(LocalLoadsDists, LoadSorter);
SmallVector<LoadInst *> Loads;
unsigned MaxConsecutiveDistance = 0;
unsigned CurrentConsecutiveDist = 1;
- int LastDist = LocalLoadsDists.front().second;
+ int64_t LastDist = LocalLoadsDists.front().second;
bool AllowMaskedGather = IsMaskedGatherSupported(OriginalLoads);
- for (const std::pair<LoadInst *, int> &L : LocalLoadsDists) {
+ for (const std::pair<LoadInst *, int64_t> &L : LocalLoadsDists) {
if (isVectorized(L.first))
continue;
assert(LastDist >= L.second &&
"Expected first distance always not less than second");
- if (static_cast<unsigned>(LastDist - L.second) ==
+ if (static_cast<uint64_t>(LastDist - L.second) ==
CurrentConsecutiveDist) {
++CurrentConsecutiveDist;
MaxConsecutiveDistance =
@@ -8698,12 +8709,12 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
if (!Ref.empty() && !NonVectorized.empty() &&
std::accumulate(
Ref.begin(), Ref.end(), 0u,
- [](unsigned S,
- ArrayRef<std::pair<LoadInst *, int>> LoadsDists) -> unsigned {
- return S + LoadsDists.size();
- }) != NonVectorized.size() &&
+ [](unsigned S, ArrayRef<std::pair<LoadInst *, int64_t>> LoadsDists)
+ -> unsigned { return S + LoadsDists.size(); }) !=
+ NonVectorized.size() &&
IsMaskedGatherSupported(NonVectorized)) {
- SmallVector<SmallVector<std::pair<LoadInst *, int>>> FinalGatheredLoads;
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>>
+ FinalGatheredLoads;
for (LoadInst *LI : NonVectorized) {
// Reinsert non-vectorized loads to other list of loads with the same
// base pointers.
@@ -9299,10 +9310,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
Ptr0 = PointerOps[CurrentOrder.front()];
PtrN = PointerOps[CurrentOrder.back()];
}
- std::optional<int> Dist =
+ std::optional<int64_t> Dist =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted pointer operands are consecutive.
- if (static_cast<unsigned>(*Dist) == VL.size() - 1)
+ if (static_cast<uint64_t>(*Dist) == VL.size() - 1)
return TreeEntry::Vectorize;
}
@@ -10751,7 +10762,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const {
if (!isValidElementType(EltTy))
return 0;
- uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
+ size_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
VTSize != DL->getTypeStoreSizeInBits(T))
return 0;
@@ -11950,7 +11961,7 @@ void BoUpSLP::transformNodes() {
// A list of loads to be gathered during the vectorization process. We can
// try to vectorize them at the end, if profitable.
SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
- SmallVector<SmallVector<std::pair<LoadInst *, int>>>, 8>
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>>, 8>
GatheredLoads;
for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
@@ -18270,12 +18281,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *Ptr0 = cast<LoadInst>(E->Scalars.front())->getPointerOperand();
Value *PtrN = cast<LoadInst>(E->Scalars.back())->getPointerOperand();
PO = IsReverseOrder ? PtrN : Ptr0;
- std::optional<int> Diff = getPointersDiff(
+ std::optional<int64_t> Diff = getPointersDiff(
VL0->getType(), Ptr0, VL0->getType(), PtrN, *DL, *SE);
Type *StrideTy = DL->getIndexType(PO->getType());
Value *StrideVal;
if (Diff) {
- int Stride = *Diff / (static_cast<int>(E->Scalars.size()) - 1);
+ int64_t Stride =
+ *Diff / (static_cast<int64_t>(E->Scalars.size()) - 1);
StrideVal =
ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
DL->getTypeAllocSize(ScalarTy));
@@ -21133,18 +21145,18 @@ class RelatedStoreInsts {
/// \p PtrDist.
/// Does nothing if there is already a store with that \p PtrDist.
/// \returns The previously associated Instruction index, or std::nullopt
- std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int PtrDist) {
+ std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int64_t PtrDist) {
auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx);
- return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
+ return Inserted ? std::nullopt : std::make_optional(It->second);
}
- using DistToInstMap = std::map<int, unsigned>;
+ using DistToInstMap = std::map<int64_t, unsigned>;
const DistToInstMap &getStores() const { return Instrs; }
/// If \p SI is related to this group of stores, return the distance of its
/// pointer operand to the one the group's BaseInstr.
- std::optional<int> getPointerDiff(StoreInst &SI, const DataLayout &DL,
- ScalarEvolution &SE) const {
+ std::optional<int64_t> getPointerDiff(StoreInst &SI, const DataLayout &DL,
+ ScalarEvolution &SE) const {
StoreInst &BaseStore = *AllStores[BaseInstrIdx];
return getPointersDiff(
BaseStore.getValueOperand()->getType(), BaseStore.getPointerOperand(),
@@ -21155,7 +21167,7 @@ class RelatedStoreInsts {
/// Recompute the pointer distances to be based on \p NewBaseInstIdx.
/// Stores whose index is less than \p MinSafeIdx will be dropped.
void rebase(unsigned MinSafeIdx, unsigned NewBaseInstIdx,
- int DistFromCurBase) {
+ int64_t DistFromCurBase) {
DistToInstMap PrevSet = std::move(Instrs);
reset(NewBaseInstIdx);
@@ -21171,7 +21183,7 @@ class RelatedStoreInsts {
/// Remove all stores that have been vectorized from this group.
void clearVectorizedStores(const BoUpSLP::ValueSet &VectorizedStores) {
DistToInstMap::reverse_iterator LastVectorizedStore = find_if(
- reverse(Instrs), [&](const std::pair<int, unsigned> &DistAndIdx) {
+ reverse(Instrs), [&](const std::pair<int64_t, unsigned> &DistAndIdx) {
return VectorizedStores.contains(AllStores[DistAndIdx.second]);
});
@@ -21204,7 +21216,7 @@ bool SLPVectorizerPass::vectorizeStores(
bool Changed = false;
auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
- int PrevDist = -1;
+ int64_t PrevDist = -1;
BoUpSLP::ValueList Operands;
// Collect the chain into a list.
for (auto [Idx, Data] : enumerate(StoreSeq)) {
@@ -21505,7 +21517,7 @@ bool SLPVectorizerPass::vectorizeStores(
// dependencies and no need to waste compile time to try to vectorize them.
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
- std::optional<int> PtrDist;
+ std::optional<int64_t> PtrDist;
auto *RelatedStores = find_if(
SortedStores, [&PtrDist, SI, this](const RelatedStoreInsts &StoreSeq) {
PtrDist = StoreSeq.getPointerDiff(*SI, *DL, *SE);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll b/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll
index 9cfafd2784488..f663d120b136a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll
@@ -5,7 +5,13 @@ define void @test(ptr %this) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[THIS:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: store <4 x i64> <i64 1, i64 2, i64 3, i64 4>, ptr [[THIS]], align 8
+; CHECK-NEXT: store i64 1, ptr [[THIS]], align 8
+; CHECK-NEXT: [[B:%.*]] = getelementptr i8, ptr [[THIS]], i64 8
+; CHECK-NEXT: store i64 2, ptr [[B]], align 8
+; CHECK-NEXT: [[C:%.*]] = getelementptr i8, ptr [[THIS]], i64 4294967312
+; CHECK-NEXT: store i64 3, ptr [[C]], align 8
+; CHECK-NEXT: [[D:%.*]] = getelementptr i8, ptr [[THIS]], i64 4294967320
+; CHECK-NEXT: store i64 4, ptr [[D]], align 8
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list