[llvm] 45c4673 - [LAA] Pass access type to getPtrStride()
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 11 10:17:13 PDT 2021
Author: Nikita Popov
Date: 2021-09-11T19:16:49+02:00
New Revision: 45c467346a79ec3df083a29d93184ce1f09c4083
URL: https://github.com/llvm/llvm-project/commit/45c467346a79ec3df083a29d93184ce1f09c4083
DIFF: https://github.com/llvm/llvm-project/commit/45c467346a79ec3df083a29d93184ce1f09c4083.diff
LOG: [LAA] Pass access type to getPtrStride()
Pass the access type to getPtrStride(), so it is not determined
from the pointer element type. Many cases still fetch the element
type at a higher level though, so this only partially addresses
the issue.
Added:
Modified:
llvm/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Analysis/VectorUtils.cpp
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 81132edb9e2b..ee951bf2c1b0 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -670,8 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr);
-/// If the pointer has a constant stride return it in units of its
-/// element size. Otherwise return zero.
+/// If the pointer has a constant stride return it in units of the access type
+/// size. Otherwise return zero.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
@@ -680,7 +680,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
-int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index e7dcdda8af89..ed9e0beb0339 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -340,7 +340,7 @@ class LoopVectorizationLegality {
/// -1 - Address is consecutive, and decreasing.
/// NOTE: This method must only be used before modifying the original scalar
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
- int isConsecutivePtr(Value *Ptr) const;
+ int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 6b1c493495bb..7856128b4f9c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -658,7 +658,8 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
+ Type *AccessTy = Ptr->getType()->getPointerElementType();
+ int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -1025,15 +1026,17 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// Check whether the access through \p Ptr has a constant stride.
-int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
- const Loop *Lp, const ValueToValueMap &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
+int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
+ Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap, bool Assume,
+ bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
- // Make sure that the pointer does not point to aggregate types.
- auto *PtrTy = cast<PointerType>(Ty);
- if (PtrTy->getElementType()->isAggregateType()) {
+ // Make sure we're not accessing an aggregate type.
+ // TODO: Why? This doesn't make any sense.
+ if (AccessTy->isAggregateType()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
@@ -1070,8 +1073,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
if (!IsNoWrapAddRec && !IsInBoundsGEP &&
- NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace())) {
+ NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
@@ -1099,7 +1101,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+ int64_t Size = DL.getTypeAllocSize(AccessTy);
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
@@ -1119,7 +1121,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// zero we know that this won't happen without triggering undefined behavior.
if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
(IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace()))) {
+ AddrSpace))) {
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
@@ -1477,6 +1479,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@@ -1487,8 +1491,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
- int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
+ int64_t StrideAPtr =
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
+ int64_t StrideBPtr =
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1497,6 +1503,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// dependence.
if (StrideAPtr < 0) {
std::swap(APtr, BPtr);
+ std::swap(ATy, BTy);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
@@ -1518,8 +1525,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
uint64_t Stride = std::abs(StrideAPtr);
@@ -1981,7 +1986,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr).second ||
- !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
+ !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 2104f04c0d0f..8bb5861eedc5 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -986,7 +986,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
@@ -1205,8 +1205,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
Instruction *Member = Group->getMember(Index);
assert(Member && "Group member does not exist");
Value *MemberPtr = getLoadStorePointerOperand(Member);
- if (getPtrStride(PSE, MemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true))
+ Type *AccessTy = getLoadStoreType(Member);
+ if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
+ /*Assume=*/false, /*ShouldCheckWrap=*/true))
return false;
LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
<< FirstOrLast
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index fb834c01301b..044a29cf84d6 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2060,8 +2060,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return false;
}
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
- Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
- int64_t NextStride = getPtrStride(PSE, Ptr, L);
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ Type *AccessTy = getLoadStoreType(&I);
+ int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 9c4f18f8e221..79dd120a9ab6 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -108,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
- if (getPtrStride(PSE, LoadPtr, L) != 1 ||
- getPtrStride(PSE, StorePtr, L) != 1)
+ if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, LoadType, StorePtr, L) != 1)
return false;
auto &DL = Load->getParent()->getModule()->getDataLayout();
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3c484fb0d28a..898aaec12b94 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -419,7 +419,8 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
+int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
+ Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
@@ -428,7 +429,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
bool CanAddPredicate = !OptForSize;
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
+ int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
+ CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9d7354ed639c..2808cefc1d9a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1492,14 +1492,14 @@ class LoopVectorizationCostModel {
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedStore(DataType, Alignment);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedLoad(DataType, Alignment);
}
@@ -5334,9 +5334,10 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
assert((LI || SI) && "Invalid memory instruction");
auto *Ptr = getLoadStorePointerOperand(I);
+ auto *ScalarTy = getLoadStoreType(I);
// In order to be widened, the pointer should be consecutive, first of all.
- if (!Legal->isConsecutivePtr(Ptr))
+ if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
return false;
// If the instruction is a store located in a predicated block, it will be
@@ -5347,7 +5348,6 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
- auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL))
return false;
@@ -7088,7 +7088,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
- int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -7474,8 +7474,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {
InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
- int ConsecutiveStride =
- Legal->isConsecutivePtr(getLoadStorePointerOperand(&I));
+ int ConsecutiveStride = Legal->isConsecutivePtr(
+ getLoadStoreType(&I), getLoadStorePointerOperand(&I));
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Expected consecutive stride.");
InstWidening Decision =
@@ -7975,7 +7975,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
// Check if the pointer operand of a load or store instruction is
// consecutive.
if (auto *Ptr = getLoadStorePointerOperand(Inst))
- return Legal->isConsecutivePtr(Ptr);
+ return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr);
return false;
}
More information about the llvm-commits
mailing list