[llvm] [LAA] Refine stride checks for SCEVs during dependence analysis. (PR #99577)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 04:11:55 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/99577
>From 1debdc19f914b699db8f1ab0225da6e1bc61583a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 18 Jul 2024 11:46:54 +0100
Subject: [PATCH 1/4] [LAA] Introduce helper to get SCEV for pointer stride.
(NFC)
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 35 ++++++++++++++++--------
1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 84214c47a10e1..5b5d83093e84d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1458,12 +1458,11 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
return false;
}
-/// Check whether the access through \p Ptr has a constant stride.
-std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
- Type *AccessTy, Value *Ptr,
- const Loop *Lp,
- const DenseMap<Value *, const SCEV *> &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
+static std::optional<const SCEV *>
+getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
+ const DenseMap<Value *, const SCEV *> &StridesMap, bool Assume,
+ bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -1520,13 +1519,14 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
if (Rem)
return std::nullopt;
+ const SCEV *StrideSCEV = PSE.getSE()->getConstant(C->getType(), Stride);
if (!ShouldCheckWrap)
- return Stride;
+ return StrideSCEV;
// The address calculation must not wrap. Otherwise, a dependence could be
// inverted.
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
- return Stride;
+ return StrideSCEV;
// An inbounds getelementptr that is a AddRec with a unit stride
// cannot wrap per definition. If it did, the result would be poison
@@ -1534,7 +1534,7 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
// when executed.
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
- return Stride;
+ return StrideSCEV;
// If the null pointer is undefined, then a access sequence which would
// otherwise access it can be assumed not to unsigned wrap. Note that this
@@ -1542,7 +1542,7 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
unsigned AddrSpace = Ty->getPointerAddressSpace();
if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
(Stride == 1 || Stride == -1))
- return Stride;
+ return StrideSCEV;
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
@@ -1550,7 +1550,7 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
- return Stride;
+ return StrideSCEV;
}
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
@@ -1558,6 +1558,19 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
return std::nullopt;
}
+/// Check whether the access through \p Ptr has a constant stride.
+std::optional<int64_t>
+llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
+ bool Assume, bool ShouldCheckWrap) {
+ std::optional<const SCEV *> StrideSCEV = getPtrStrideSCEV(
+ PSE, AccessTy, Ptr, Lp, StridesMap, Assume, ShouldCheckWrap);
+ if (StrideSCEV && isa<SCEVConstant>(*StrideSCEV))
+ return cast<SCEVConstant>(*StrideSCEV)->getAPInt().getSExtValue();
+ return std::nullopt;
+}
+
std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
Type *ElemTyB, Value *PtrB,
const DataLayout &DL,
>From e6a864c51e98a0be0e33284619998867d51cfc31 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 18 Jul 2024 16:13:40 +0100
Subject: [PATCH 2/4] [LAA] Refine stride checks for SCEVs during dependence
analysis.
NOTE: This PR contains 1debdc19f914b699db8f1ab0225da6e1bc61583a, which
introduces a helper to get the stride as SCEV expression and is NFC.
This can be submitted separately once we converge on the final version
of the PR.
Update getDependenceDistanceStrideAndSize to reason about different
combinations of strides directly and explicitly.
Instead of getting the constant strides for both source and sink of a
dependence, start by getting the SCEV for their strides, if the SCEVs
are non-wrapping AddRecs or nullopt otherwise.
Then proceed by checking the strides.
If either source or sink are not strided (i.e. not a non-wrapping
AddRec), we check if either is not loop invariant and not strided.
In that case, the accesses may overlap with earlier or later iterations
and we cannot generate runtime checks to disambiguate them.
Otherwise they are either loop invariant or strided. In that case, we
can generate a runtime check to disambiguate them.
If both are strided but either is not strided by a constant, we cannot
analyze them further currently, but may be able to disambiguate them
with a runtime check. Reasoning about non-constant strides can be
extended as follow-up.
If both are strided by constants, we proceed as previously.
This is an alternative to
https://github.com/llvm/llvm-project/pull/99239 and also replaces
additional checks if the underlying object is loop-invariant.
Fixes https://github.com/llvm/llvm-project/issues/87189.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 19 +--
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 112 ++++++++++--------
.../load-store-index-loaded-in-loop.ll | 26 ++--
.../pointer-with-unknown-bounds.ll | 4 +-
.../LoopAccessAnalysis/print-order.ll | 6 +-
.../LoopAccessAnalysis/select-dependence.ll | 4 +-
.../LoopAccessAnalysis/symbolic-stride.ll | 4 +-
7 files changed, 89 insertions(+), 86 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index afafb74bdcb0a..60d953ae18d75 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -199,9 +199,7 @@ class MemoryDepChecker {
/// Check whether the dependencies between the accesses are safe.
///
/// Only checks sets with elements in \p CheckDeps.
- bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects);
+ bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps);
/// No memory dependence was encountered that would inhibit
/// vectorization.
@@ -351,11 +349,8 @@ class MemoryDepChecker {
/// element access it records this distance in \p MinDepDistBytes (if this
/// distance is smaller than any other distance encountered so far).
/// Otherwise, this function returns true signaling a possible dependence.
- Dependence::DepType
- isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
- unsigned BIdx,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects);
+ Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx);
/// Check whether the data dependence could prevent store-load
/// forwarding.
@@ -392,11 +387,9 @@ class MemoryDepChecker {
/// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
/// BIsWrite).
std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
- getDependenceDistanceStrideAndSize(
- const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
- Instruction *BInst,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects);
+ getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
+ const MemAccessInfo &B,
+ Instruction *BInst);
};
class RuntimePointerChecking;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 5b5d83093e84d..cee5d353db5d2 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1912,23 +1912,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
return ScaledDist % Stride;
}
-/// Returns true if any of the underlying objects has a loop varying address,
-/// i.e. may change in \p L.
-static bool
-isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
- ScalarEvolution &SE, const Loop *L) {
- return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
- return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
- });
-}
-
std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::DepDistanceStrideAndSizeInfo>
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
- const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
+ const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
@@ -1946,12 +1934,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- int64_t StrideAPtr =
- getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
- .value_or(0);
- int64_t StrideBPtr =
- getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true)
- .value_or(0);
+ std::optional<const SCEV *> StrideAPtr = getPtrStrideSCEV(
+ PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
+ std::optional<const SCEV *> StrideBPtr = getPtrStrideSCEV(
+ PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1959,26 +1945,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
- if (StrideAPtr < 0) {
+ if (StrideAPtr && SE.isKnownNegative(*StrideAPtr)) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
+ std::swap(StrideAPtr, StrideBPtr);
}
const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
+ << "\n");
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");
- // Needs accesses where the addresses of the accessed underlying objects do
- // not change within the loop.
- if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
- InnermostLoop) ||
- isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
- InnermostLoop))
- return MemoryDepChecker::Dependence::IndirectUnsafe;
-
// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. At the moment this is limited to cases where either source or
// sink are loop invariant to avoid compile-time increases. This is not
@@ -2000,11 +1979,47 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
}
}
- // Need accesses with constant strides and the same direction. We don't want
- // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
- // could wrap in the address space.
- if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
- (StrideAPtr < 0 && StrideBPtr > 0)) {
+ // Need accesses with constant strides and the same direction for further
+ // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
+ // similar code or pointer arithmetic that could wrap in the address space.
+ //
+ // If Src or Sink are non-wrapping AddRecs, StrideAPtr and StrideBPtr contain
+ // a SCEV representing the stride of the SCEV. It may not be a known constant
+ // value though.
+
+ // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec), we
+ // cannot analyze the dependence further.
+ if (!StrideAPtr || !StrideBPtr) {
+ bool SrcInvariant = SE.isLoopInvariant(Src, InnermostLoop);
+ bool SinkInvariant = SE.isLoopInvariant(Sink, InnermostLoop);
+ // If either Src or Sink are not loop invariant and not strided, the
+ // expression in the current iteration may overlap with any earlier or later
+ // iteration. This is not safe and we also cannot generate runtime checks to
+ // ensure safety. This includes expressions where an index is loaded in each
+ // iteration or wrapping AddRecs.
+ if ((!SrcInvariant && !StrideAPtr) || (!SinkInvariant && !StrideBPtr))
+ return MemoryDepChecker::Dependence::IndirectUnsafe;
+
+ // Otherwise both Src or Sink are either loop invariant or strided and we
+ // can generate a runtime check to disambiguate the accesses.
+ return MemoryDepChecker::Dependence::Unknown;
+ }
+
+ LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << **StrideAPtr
+ << " Sink induction step: " << **StrideBPtr << "\n");
+ // If either Src or Sink have a non-constant stride, we can generate a runtime
+ // check to disambiguate them.
+ if ((!isa<SCEVConstant>(*StrideAPtr)) || (!isa<SCEVConstant>(*StrideBPtr)))
+ return MemoryDepChecker::Dependence::Unknown;
+
+ // Both Src and Sink have a constant stride, check if they are in the same
+ // direction.
+ int64_t StrideAPtrInt =
+ cast<SCEVConstant>(*StrideAPtr)->getAPInt().getSExtValue();
+ int64_t StrideBPtrInt =
+ cast<SCEVConstant>(*StrideBPtr)->getAPInt().getSExtValue();
+ if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
+ (StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return MemoryDepChecker::Dependence::Unknown;
}
@@ -2014,22 +2029,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
if (!HasSameSize)
TypeByteSize = 0;
- return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
- std::abs(StrideBPtr), TypeByteSize,
+ return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
+ std::abs(StrideBPtrInt), TypeByteSize,
AIsWrite, BIsWrite);
}
-MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
- const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
- unsigned BIdx,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx) {
assert(AIdx < BIdx && "Must pass arguments in program order");
// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
- auto Res = getDependenceDistanceStrideAndSize(
- A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
+ auto Res =
+ getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);
@@ -2263,10 +2276,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
return Dependence::BackwardVectorizable;
}
-bool MemoryDepChecker::areDepsSafe(
- DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
+bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
+ MemAccessInfoList &CheckDeps) {
MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2309,8 +2320,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);
- Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
- B.second, UnderlyingObjects);
+ Dependence::DepType Type =
+ isDependent(*A.first, A.second, *B.first, B.second);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
@@ -2665,8 +2676,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
- Accesses.getDependenciesToCheck(),
- Accesses.getUnderlyingObjects());
+ Accesses.getDependenciesToCheck());
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
index 2e61a28039846..6d8e296ec72fa 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll
@@ -9,21 +9,19 @@
define void @B_indices_loaded_in_loop_A_stored(ptr %A, ptr noalias %B, i64 %N, i64 %off) {
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_stored'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
+; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: %indices = load i8, ptr %gep.A, align 1 ->
+; CHECK-NEXT: store i32 %l, ptr %gep.C, align 4
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT: %gep.C = getelementptr inbounds i32, ptr %A, i64 %iv
-; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
-; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.off
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group [[GRP1]]:
-; CHECK-NEXT: (Low: %A High: ((4 * %N) + %A))
-; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop>
-; CHECK-NEXT: Group [[GRP2]]:
-; CHECK-NEXT: (Low: (%off + %A) High: (%N + %off + %A))
-; CHECK-NEXT: Member: {(%off + %A),+,1}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -59,9 +57,9 @@ define void @B_indices_loaded_in_loop_A_not_stored(ptr %A, ptr noalias %B, i64 %
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_not_stored'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
index 546a75cf4efd5..28ee6c6f0a89a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
@@ -13,9 +13,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Report: cannot identify array bounds
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %loadA = load i16, ptr %arrayidxA, align 2 ->
; CHECK-NEXT: store i16 %mul, ptr %arrayidxA, align 2
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 18e45f469b4a3..8ca30383092c6 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -9,8 +9,9 @@
; CHECK-LABEL: 'negative_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
-; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
+; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
+; CHECK-NEXT: LAA: Src induction step: -1 Sink induction step: -1
; CHECK-NEXT: LAA: Dependence is negative
define void @negative_step(ptr nocapture %A) {
@@ -41,8 +42,9 @@ exit:
; CHECK-LABEL: 'positive_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
-; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
+; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
+; CHECK-NEXT: LAA: Src induction step: 1 Sink induction step: 1
; CHECK-NEXT: LAA: Dependence is negative
define void @positive_step(ptr nocapture %A) {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
index 60fe8b4fcbed4..8bef7583c35c0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
@@ -5,9 +5,9 @@ define void @test(ptr noalias %x, ptr noalias %y, ptr noalias %z) {
; CHECK-LABEL: 'test'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load double, ptr %gep.sel, align 8 ->
; CHECK-NEXT: store double %load, ptr %gep.sel2, align 8
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 7c1b11e22aef2..f0aed2421a96e 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -276,9 +276,9 @@ define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i
; CHECK-LABEL: 'single_stride_used_for_trip_count'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
>From cdbf2caf669dd33c21a5398956b2f9a06ddee91b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Jul 2024 16:57:01 +0100
Subject: [PATCH 3/4] Fix
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 31 ++++++++++++------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 91685f1c4936f..a0cda6656e6a8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1464,8 +1464,13 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const DenseMap<Value *, const SCEV *> &StridesMap, bool Assume,
bool ShouldCheckWrap) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
+ auto &DL = Lp->getHeader()->getDataLayout();
+ auto *IdxTy = DL.getIndexType(Ty);
+ if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
+ return PSE.getSE()->getZero(IdxTy);
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
@@ -1473,7 +1478,6 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)
@@ -1503,7 +1507,6 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
- auto &DL = Lp->getHeader()->getDataLayout();
TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
int64_t Size = AllocSize.getFixedValue();
const APInt &APStepVal = C->getAPInt();
@@ -1935,9 +1938,9 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- std::optional<const SCEV *> StrideAPtr = getPtrStrideSCEV(
+ std::optional<int64_t> StrideAPtr = getPtrStride(
PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
- std::optional<const SCEV *> StrideBPtr = getPtrStrideSCEV(
+ std::optional<int64_t> StrideBPtr = getPtrStride(
PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
const SCEV *Src = PSE.getSCEV(APtr);
@@ -1946,7 +1949,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
- if (StrideAPtr && SE.isKnownNegative(*StrideAPtr)) {
+ if (StrideAPtr && *StrideAPtr < 0) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
std::swap(StrideAPtr, StrideBPtr);
@@ -1991,6 +1994,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// If either Src or Sink are not strided (i.e. not a non-wrapping AddRec), we
// cannot analyze the dependence further.
if (!StrideAPtr || !StrideBPtr) {
+ return MemoryDepChecker::Dependence::IndirectUnsafe;
+ }
+
+ if (*StrideAPtr == 0 || StrideBPtr == 0) {
bool SrcInvariant = SE.isLoopInvariant(Src, InnermostLoop);
bool SinkInvariant = SE.isLoopInvariant(Sink, InnermostLoop);
// If either Src or Sink are not loop invariant and not strided, the
@@ -1998,27 +2005,21 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// iteration. This is not safe and we also cannot generate runtime checks to
// ensure safety. This includes expressions where an index is loaded in each
// iteration or wrapping AddRecs.
- if ((!SrcInvariant && !StrideAPtr) || (!SinkInvariant && !StrideBPtr))
- return MemoryDepChecker::Dependence::IndirectUnsafe;
// Otherwise both Src or Sink are either loop invariant or strided and we
// can generate a runtime check to disambiguate the accesses.
return MemoryDepChecker::Dependence::Unknown;
}
- LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << **StrideAPtr
- << " Sink induction step: " << **StrideBPtr << "\n");
- // If either Src or Sink have a non-constant stride, we can generate a runtime
- // check to disambiguate them.
- if ((!isa<SCEVConstant>(*StrideAPtr)) || (!isa<SCEVConstant>(*StrideBPtr)))
- return MemoryDepChecker::Dependence::Unknown;
+ LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << *StrideAPtr
+ << " Sink induction step: " << *StrideBPtr << "\n");
// Both Src and Sink have a constant stride, check if they are in the same
// direction.
int64_t StrideAPtrInt =
- cast<SCEVConstant>(*StrideAPtr)->getAPInt().getSExtValue();
+ *StrideAPtr;
int64_t StrideBPtrInt =
- cast<SCEVConstant>(*StrideBPtr)->getAPInt().getSExtValue();
+ *StrideBPtr;
if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
(StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
>From 742547f378a27a5facb3117b2504b97783be7b6e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Jul 2024 17:34:50 +0100
Subject: [PATCH 4/4] !fixup simplify
---
.../llvm/Analysis/LoopAccessAnalysis.h | 3 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 88 ++++++-------------
2 files changed, 30 insertions(+), 61 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 60d953ae18d75..fe903635b0930 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -790,7 +790,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
Value *Ptr);
/// If the pointer has a constant stride return it in units of the access type
-/// size. Otherwise return std::nullopt.
+/// size. If the pointer is loop-invariant, return 0. Otherwise return
+/// std::nullopt.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index a0cda6656e6a8..b11fbe8898b66 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -728,11 +728,6 @@ class AccessAnalysis {
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
- const DenseMap<Value *, SmallVector<const Value *, 16>> &
- getUnderlyingObjects() {
- return UnderlyingObjects;
- }
-
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
@@ -1459,19 +1454,18 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
return false;
}
-static std::optional<const SCEV *>
-getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
- const Loop *Lp,
- const DenseMap<Value *, const SCEV *> &StridesMap, bool Assume,
- bool ShouldCheckWrap) {
+/// Check whether the access through \p Ptr has a constant stride.
+std::optional<int64_t>
+llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
+ bool Assume, bool ShouldCheckWrap) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
- Type *Ty = Ptr->getType();
- assert(Ty->isPointerTy() && "Unexpected non-ptr");
- auto &DL = Lp->getHeader()->getDataLayout();
- auto *IdxTy = DL.getIndexType(Ty);
if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
- return PSE.getSE()->getZero(IdxTy);
+ return {0};
+ Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Unexpected non-ptr");
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
@@ -1507,6 +1501,7 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
+ auto &DL = Lp->getHeader()->getDataLayout();
TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
int64_t Size = AllocSize.getFixedValue();
const APInt &APStepVal = C->getAPInt();
@@ -1523,14 +1518,13 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
if (Rem)
return std::nullopt;
- const SCEV *StrideSCEV = PSE.getSE()->getConstant(C->getType(), Stride);
if (!ShouldCheckWrap)
- return StrideSCEV;
+ return Stride;
// The address calculation must not wrap. Otherwise, a dependence could be
// inverted.
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
- return StrideSCEV;
+ return Stride;
// An inbounds getelementptr that is a AddRec with a unit stride
// cannot wrap per definition. If it did, the result would be poison
@@ -1538,7 +1532,7 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
// when executed.
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
- return StrideSCEV;
+ return Stride;
// If the null pointer is undefined, then a access sequence which would
// otherwise access it can be assumed not to unsigned wrap. Note that this
@@ -1546,7 +1540,7 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
unsigned AddrSpace = Ty->getPointerAddressSpace();
if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
(Stride == 1 || Stride == -1))
- return StrideSCEV;
+ return Stride;
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
@@ -1554,7 +1548,7 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
- return StrideSCEV;
+ return Stride;
}
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
@@ -1562,19 +1556,6 @@ getPtrStrideSCEV(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
-/// Check whether the access through \p Ptr has a constant stride.
-std::optional<int64_t>
-llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
- const Loop *Lp,
- const DenseMap<Value *, const SCEV *> &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
- std::optional<const SCEV *> StrideSCEV = getPtrStrideSCEV(
- PSE, AccessTy, Ptr, Lp, StridesMap, Assume, ShouldCheckWrap);
- if (StrideSCEV && isa<SCEVConstant>(*StrideSCEV))
- return cast<SCEVConstant>(*StrideSCEV)->getAPInt().getSExtValue();
- return std::nullopt;
-}
-
std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
Type *ElemTyB, Value *PtrB,
const DataLayout &DL,
@@ -1986,43 +1967,30 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// Need accesses with constant strides and the same direction for further
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
// similar code or pointer arithmetic that could wrap in the address space.
- //
- // If Src or Sink are non-wrapping AddRecs, StrideAPtr and StrideBPtr contain
- // a SCEV representing the stride of the SCEV. It may not be a known constant
- // value though.
- // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec), we
- // cannot analyze the dependence further.
+ // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
+ // not loop-invariant (stride will be 0 in that case), we cannot analyze the
+ // dependence further and also cannot generate runtime checks.
if (!StrideAPtr || !StrideBPtr) {
+ LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return MemoryDepChecker::Dependence::IndirectUnsafe;
}
- if (*StrideAPtr == 0 || StrideBPtr == 0) {
- bool SrcInvariant = SE.isLoopInvariant(Src, InnermostLoop);
- bool SinkInvariant = SE.isLoopInvariant(Sink, InnermostLoop);
- // If either Src or Sink are not loop invariant and not strided, the
- // expression in the current iteration may overlap with any earlier or later
- // iteration. This is not safe and we also cannot generate runtime checks to
- // ensure safety. This includes expressions where an index is loaded in each
- // iteration or wrapping AddRecs.
-
- // Otherwise both Src or Sink are either loop invariant or strided and we
- // can generate a runtime check to disambiguate the accesses.
+ int64_t StrideAPtrInt = *StrideAPtr;
+ int64_t StrideBPtrInt = *StrideBPtr;
+ LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
+ << " Sink induction step: " << StrideBPtrInt << "\n");
+ // At least Src or Sink are loop invariant and the other is strided or
+ // invariant. We can generate a runtime check to disambiguate the accesses.
+ if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
return MemoryDepChecker::Dependence::Unknown;
- }
-
- LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << *StrideAPtr
- << " Sink induction step: " << *StrideBPtr << "\n");
// Both Src and Sink have a constant stride, check if they are in the same
// direction.
- int64_t StrideAPtrInt =
- *StrideAPtr;
- int64_t StrideBPtrInt =
- *StrideBPtr;
if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
(StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
- LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+ LLVM_DEBUG(
+ dbgs() << "Pointer access with strides in different directions\n");
return MemoryDepChecker::Dependence::Unknown;
}
More information about the llvm-commits
mailing list