[llvm] [LAA/SLP] Don't truncate APInt in getPointersDiff (PR #139941)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 11:11:20 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Change getPointersDiff to return an std::optional<int64_t>, and fill this value with using APInt::trySExtValue. This simple change requires changes to other functions in LAA, and major changes in SLPVectorizer changing types from 32-bit to 64-bit.
Fixes #<!-- -->139202.
---
Patch is 48.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139941.diff
4 Files Affected:
- (modified) llvm/include/llvm/Analysis/LoopAccessAnalysis.h (+5-6)
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+20-15)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+136-122)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll (+7-1)
``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index f715e0ec8dbb4..108f39f84ad2f 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -853,11 +853,10 @@ getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
/// is a simple API that does not depend on the analysis pass.
/// \param StrictCheck Ensure that the calculated distance matches the
/// type-based one after all the bitcasts removal in the provided pointers.
-std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
- Value *PtrB, const DataLayout &DL,
- ScalarEvolution &SE,
- bool StrictCheck = false,
- bool CheckType = true);
+std::optional<int64_t>
+getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB,
+ const DataLayout &DL, ScalarEvolution &SE,
+ bool StrictCheck = false, bool CheckType = true);
/// Attempt to sort the pointers in \p VL and return the sorted indices
/// in \p SortedIndices, if reordering is required.
@@ -871,7 +870,7 @@ std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
/// \p SortedIndices as <1,2,0,3>
bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices);
+ SmallVectorImpl<uint64_t> &SortedIndices);
/// Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index af1a3c593c514..a56603c6e0125 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1541,11 +1541,11 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
-std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
- Type *ElemTyB, Value *PtrB,
- const DataLayout &DL,
- ScalarEvolution &SE, bool StrictCheck,
- bool CheckType) {
+std::optional<int64_t> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
+ Type *ElemTyB, Value *PtrB,
+ const DataLayout &DL,
+ ScalarEvolution &SE,
+ bool StrictCheck, bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
// Make sure that A and B are different pointers.
@@ -1570,7 +1570,7 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
DL, OffsetB, /*AllowNonInbounds=*/true);
- int Val;
+ std::optional<int64_t> Val;
if (PtrA1 == PtrB1) {
// Retrieve the address space again as pointer stripping now tracks through
// `addrspacecast`.
@@ -1585,7 +1585,7 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
OffsetB = OffsetB.sextOrTrunc(IdxWidth);
OffsetB -= OffsetA;
- Val = OffsetB.getSExtValue();
+ Val = OffsetB.trySExtValue();
} else {
// Otherwise compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
@@ -1594,10 +1594,14 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
if (!Diff)
return std::nullopt;
- Val = Diff->getSExtValue();
+ Val = Diff->trySExtValue();
}
- int Size = DL.getTypeStoreSize(ElemTyA);
- int Dist = Val / Size;
+
+ if (!Val)
+ return std::nullopt;
+
+ int64_t Size = DL.getTypeStoreSize(ElemTyA);
+ int64_t Dist = *Val / Size;
// Ensure that the calculated distance matches the type-based one after all
// the bitcasts removal in the provided pointers.
@@ -1608,7 +1612,7 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices) {
+ SmallVectorImpl<uint64_t> &SortedIndices) {
assert(llvm::all_of(
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
"Expected list of pointer operands.");
@@ -1616,14 +1620,15 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
// first pointer in the array.
Value *Ptr0 = VL[0];
- using DistOrdPair = std::pair<int64_t, int>;
+ using DistOrdPair = std::pair<int64_t, uint64_t>;
auto Compare = llvm::less_first();
std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
Offsets.emplace(0, 0);
bool IsConsecutive = true;
for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
- std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
- /*StrictCheck=*/true);
+ std::optional<int64_t> Diff =
+ getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
+ /*StrictCheck=*/true);
if (!Diff)
return false;
@@ -1654,7 +1659,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return false;
Type *ElemTyA = getLoadStoreType(A);
Type *ElemTyB = getLoadStoreType(B);
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
/*StrictCheck=*/true, CheckType);
return Diff && *Diff == 1;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 45cf4e1eac092..518fe5b43f0cb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1583,8 +1583,8 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
/// values 3 and 7 respectively:
/// before: 6 9 5 4 9 2 1 0
/// after: 6 3 5 4 7 2 1 0
-static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
- const unsigned Sz = Order.size();
+static void fixupOrderingIndices(MutableArrayRef<uint64_t> Order) {
+ const uint64_t Sz = Order.size();
SmallBitVector UnusedIndices(Sz, /*t=*/true);
SmallBitVector MaskedIndices(Sz);
for (unsigned I = 0; I < Sz; ++I) {
@@ -1636,7 +1636,7 @@ static SmallVector<Constant *> replicateMask(ArrayRef<Constant *> Val,
namespace llvm {
-static void inversePermutation(ArrayRef<unsigned> Indices,
+static void inversePermutation(ArrayRef<uint64_t> Indices,
SmallVectorImpl<int> &Mask) {
Mask.clear();
const unsigned E = Indices.size();
@@ -1766,7 +1766,7 @@ class BoUpSLP {
using ValueSet = SmallPtrSet<Value *, 16>;
using StoreList = SmallVector<StoreInst *, 8>;
using ExtraValueToDebugLocsMap = SmallDenseSet<Value *, 4>;
- using OrdersType = SmallVector<unsigned, 4>;
+ using OrdersType = SmallVector<uint64_t, 4>;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
@@ -1923,7 +1923,7 @@ class BoUpSLP {
/// should be represented as an empty order, so this is used to
/// decide if we can canonicalize a computed order. Undef elements
/// (represented as size) are ignored.
- static bool isIdentityOrder(ArrayRef<unsigned> Order) {
+ static bool isIdentityOrder(ArrayRef<uint64_t> Order) {
assert(!Order.empty() && "expected non-empty order");
const unsigned Sz = Order.size();
return all_of(enumerate(Order), [&](const auto &P) {
@@ -2056,7 +2056,7 @@ class BoUpSLP {
/// \param TryRecursiveCheck used to check if long masked gather can be
/// represented as a serie of loads/insert subvector, if profitable.
LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
- SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<uint64_t> &Order,
SmallVectorImpl<Value *> &PointerOps,
unsigned *BestVF = nullptr,
bool TryRecursiveCheck = true) const;
@@ -2216,7 +2216,7 @@ class BoUpSLP {
!LI2->isSimple())
return CheckSameEntryOrFail();
- std::optional<int> Dist = getPointersDiff(
+ std::optional<int64_t> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
if (!Dist || *Dist == 0) {
@@ -3503,7 +3503,7 @@ class BoUpSLP {
/// \param ResizeAllowed indicates whether it is allowed to handle subvector
/// extract order.
bool canReuseExtract(ArrayRef<Value *> VL,
- SmallVectorImpl<unsigned> &CurrentOrder,
+ SmallVectorImpl<uint64_t> &CurrentOrder,
bool ResizeAllowed = false) const;
/// Vectorize a single entry in the tree.
@@ -3619,9 +3619,10 @@ class BoUpSLP {
/// vector loads/masked gathers instead of regular gathers. Later these loads
/// are reshufled to build final gathered nodes.
void tryToVectorizeGatheredLoads(
- const SmallMapVector<std::tuple<BasicBlock *, Value *, Type *>,
- SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
- 8> &GatheredLoads);
+ const SmallMapVector<
+ std::tuple<BasicBlock *, Value *, Type *>,
+ SmallVector<SmallVector<std::pair<LoadInst *, int64_t>>>, 8>
+ &GatheredLoads);
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
/// users of \p TE and collects the stores. It returns the map from the store
@@ -3788,7 +3789,7 @@ class BoUpSLP {
SmallVector<int, 4> ReuseShuffleIndices;
/// Does this entry require reordering?
- SmallVector<unsigned, 4> ReorderIndices;
+ SmallVector<uint64_t, 4> ReorderIndices;
/// Points back to the VectorizableTree.
///
@@ -4025,7 +4026,7 @@ class BoUpSLP {
dbgs() << ReuseIdx << ", ";
dbgs() << "\n";
dbgs() << "ReorderIndices: ";
- for (unsigned ReorderIdx : ReorderIndices)
+ for (uint64_t ReorderIdx : ReorderIndices)
dbgs() << ReorderIdx << ", ";
dbgs() << "\n";
dbgs() << "UserTreeIndex: ";
@@ -4074,7 +4075,7 @@ class BoUpSLP {
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<int> ReuseShuffleIndices = {},
- ArrayRef<unsigned> ReorderIndices = {},
+ ArrayRef<uint64_t> ReorderIndices = {},
unsigned InterleaveFactor = 0) {
TreeEntry::EntryState EntryState =
Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
@@ -4090,7 +4091,7 @@ class BoUpSLP {
ScheduleBundle &Bundle, const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<int> ReuseShuffleIndices = {},
- ArrayRef<unsigned> ReorderIndices = {}) {
+ ArrayRef<uint64_t> ReorderIndices = {}) {
assert(((!Bundle && (EntryState == TreeEntry::NeedToGather ||
EntryState == TreeEntry::SplitVectorize)) ||
(Bundle && EntryState != TreeEntry::NeedToGather &&
@@ -4122,7 +4123,7 @@ class BoUpSLP {
// Reorder scalars and build final mask.
Last->Scalars.assign(VL.size(), nullptr);
transform(ReorderIndices, Last->Scalars.begin(),
- [VL](unsigned Idx) -> Value * {
+ [VL](uint64_t Idx) -> Value * {
if (Idx >= VL.size())
return UndefValue::get(VL.front()->getType());
return VL[Idx];
@@ -5316,12 +5317,12 @@ static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
/// the original order of the scalars. Procedure transforms the provided order
/// in accordance with the given \p Mask. If the resulting \p Order is just an
/// identity order, \p Order is cleared.
-static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask,
+static void reorderOrder(SmallVectorImpl<uint64_t> &Order, ArrayRef<int> Mask,
bool BottomOrder = false) {
assert(!Mask.empty() && "Expected non-empty mask.");
unsigned Sz = Mask.size();
if (BottomOrder) {
- SmallVector<unsigned> PrevOrder;
+ SmallVector<uint64_t> PrevOrder;
if (Order.empty()) {
PrevOrder.resize(Sz);
std::iota(PrevOrder.begin(), PrevOrder.end(), 0);
@@ -5368,7 +5369,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
// patterns.
SmallVector<Value *> GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
Type *ScalarTy = GatheredScalars.front()->getType();
- int NumScalars = GatheredScalars.size();
+ uint64_t NumScalars = GatheredScalars.size();
if (!isValidElementType(ScalarTy))
return std::nullopt;
auto *VecTy = getWidenedType(ScalarTy, NumScalars);
@@ -5430,7 +5431,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
(GatherShuffles.empty() && IsSplatMask(ExtractMask)))
return std::nullopt;
SmallBitVector ShuffledSubMasks(NumParts);
- auto TransformMaskToOrder = [&](MutableArrayRef<unsigned> CurrentOrder,
+ auto TransformMaskToOrder = [&](MutableArrayRef<uint64_t> CurrentOrder,
ArrayRef<int> Mask, int PartSz, int NumParts,
function_ref<unsigned(unsigned)> GetVF) {
for (int I : seq<int>(0, NumParts)) {
@@ -5440,9 +5441,9 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
if (VF == 0)
continue;
unsigned Limit = getNumElems(CurrentOrder.size(), PartSz, I);
- MutableArrayRef<unsigned> Slice = CurrentOrder.slice(I * PartSz, Limit);
+ MutableArrayRef<uint64_t> Slice = CurrentOrder.slice(I * PartSz, Limit);
// Shuffle of at least 2 vectors - ignore.
- if (any_of(Slice, [&](int I) { return I != NumScalars; })) {
+ if (any_of(Slice, [&](uint64_t I) { return I != NumScalars; })) {
std::fill(Slice.begin(), Slice.end(), NumScalars);
ShuffledSubMasks.set(I);
continue;
@@ -5540,8 +5541,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE,
return std::max(Entries[I].front()->getVectorFactor(),
Entries[I].back()->getVectorFactor());
});
- int NumUndefs =
- count_if(CurrentOrder, [&](int Idx) { return Idx == NumScalars; });
+ unsigned NumUndefs =
+ count_if(CurrentOrder, [&](uint64_t Idx) { return Idx == NumScalars; });
if (ShuffledSubMasks.all() || (NumScalars > 2 && NumUndefs >= NumScalars / 2))
return std::nullopt;
return std::move(CurrentOrder);
@@ -5574,7 +5575,7 @@ static Align computeCommonAlignment(ArrayRef<Value *> VL) {
}
/// Check if \p Order represents reverse order.
-static bool isReverseOrder(ArrayRef<unsigned> Order) {
+static bool isReverseOrder(ArrayRef<uint64_t> Order) {
assert(!Order.empty() &&
"Order is empty. Please check it before using isReverseOrder.");
unsigned Sz = Order.size();
@@ -5593,7 +5594,7 @@ static bool isReverseOrder(ArrayRef<unsigned> Order) {
static std::optional<Value *>
calculateRtStride(ArrayRef<Value *> PointerOps, Type *ElemTy,
const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices,
+ SmallVectorImpl<uint64_t> &SortedIndices,
Instruction *Inst = nullptr) {
SmallVector<const SCEV *> SCEVs;
const SCEV *PtrSCEVLowest = nullptr;
@@ -5856,7 +5857,7 @@ static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec,
/// with \p Order.
/// \return true if the mask represents strided access, false - otherwise.
static bool buildCompressMask(ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, Type *ScalarTy,
+ ArrayRef<uint64_t> Order, Type *ScalarTy,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<int> &CompressMask) {
const unsigned Sz = PointerOps.size();
@@ -5868,7 +5869,11 @@ static bool buildCompressMask(ArrayRef<Value *> PointerOps,
Value *Ptr0 = Order.empty() ? PointerOps.front() : PointerOps[Order.front()];
for (unsigned I : seq<unsigned>(1, Sz)) {
Value *Ptr = Order.empty() ? PointerOps[I] : PointerOps[Order[I]];
- unsigned Pos = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ std::optional<int64_t> OptPos =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ if (!OptPos || OptPos > std::numeric_limits<unsigned>::max())
+ return false;
+ unsigned Pos = static_cast<unsigned>(*OptPos);
CompressMask[I] = Pos;
if (!Stride)
continue;
@@ -5886,7 +5891,7 @@ static bool buildCompressMask(ArrayRef<Value *> PointerOps,
/// (masked) interleaved load.
static bool isMaskedLoadCompress(
ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
+ ArrayRef<uint64_t> Order, const TargetTransformInfo &TTI,
const DataLayout &DL, ScalarEvolution &SE, AssumptionCache &AC,
const DominatorTree &DT, const TargetLibraryInfo &TLI,
const function_ref<bool(Value *)> AreAllUsersVectorized, bool &IsMasked,
@@ -5894,7 +5899,7 @@ static bool isMaskedLoadCompress(
VectorType *&LoadVecTy) {
InterleaveFactor = 0;
Type *ScalarTy = VL.front()->getType();
- const unsigned Sz = VL.size();
+ const uint64_t Sz = VL.size();
auto *VecTy = getWidenedType(ScalarTy, Sz);
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
SmallVector<int> Mask;
@@ -5921,11 +5926,11 @@ static bool isMaskedLoadCompress(
Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];
}
- std::optional<int> Diff =
+ std::optional<int64_t> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
if (!Diff)
return false;
- const unsigned MaxRegSize =
+ const uint64_t MaxRegSize =
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
.getFixedValue();
// Check for very large distances between elements.
@@ -6020,7 +6025,7 @@ static bool isMaskedLoadCompress(
/// (masked) interleaved load.
static bool
isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
+ ArrayRef<uint64_t> Order, const TargetTransformInfo &TTI,
const DataLayout &DL, ScalarEvolution &SE,
AssumptionCache &AC, const DominatorTree &DT,
const TargetLibraryInfo &TLI,
@@ -6048,12 +6053,13 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
static bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order,
+ ArrayRef<uint64_t> Order,
const TargetTransformInfo &TTI, const DataLayout &DL,
ScalarEvolution &SE,
- const bool IsAnyPointerUsedOutGraph, const int Diff) {
- const unsigned Sz = VL.size();
- const unsigned AbsoluteDiff = std::abs(Diff);
+ const bool IsAnyPointerUsedOutGraph,
+ const int64_t Diff) {
+ const uint64_t Sz = VL.size();
+ const uint64_t AbsoluteDiff = std::abs(Diff);
Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsA...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139941
More information about the llvm-commits
mailing list