[llvm] [LAA] Pass maximum stride to isSafeDependenceDistance. (PR #90036)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 25 13:46:11 PDT 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/90036
>From fcec466cf4a263f267550d6982f0b136503ea46d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Apr 2024 09:47:29 +0100
Subject: [PATCH 1/2] [LAA] Support different strides & non constant dep
distances using SCEV. #88039
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 134 ++++++++++++------
.../Transforms/Scalar/LoopLoadElimination.cpp | 4 +-
.../non-constant-strides-forward.ll | 10 +-
3 files changed, 95 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b1ba8e7c0f6014..ed8e749f9d6c2d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
namespace {
struct DepDistanceStrideAndSizeInfo {
const SCEV *Dist;
- uint64_t Stride;
+ uint64_t StrideA;
+ uint64_t StrideB;
uint64_t TypeByteSize;
bool AIsWrite;
bool BIsWrite;
- DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
- uint64_t TypeByteSize, bool AIsWrite,
- bool BIsWrite)
- : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
- AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+ DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
+ uint64_t StrideB, uint64_t TypeByteSize,
+ bool AIsWrite, bool BIsWrite)
+ : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
+ TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
};
} // namespace
-// Get the dependence distance, stride, type size and whether it is a write for
+// Get the dependence distance, strides, type size and whether it is a write for
// the dependence between A and B. Returns a DepType, if we can prove there's
// no dependence or the analysis fails. Outlined to lambda to limit he scope
// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
InnermostLoop))
return MemoryDepChecker::Dependence::IndirectUnsafe;
- // Need accesses with constant stride. We don't want to vectorize
- // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
- // in the address space.
- if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
+ // Need accesses with constant strides and the same direction. We don't want
+ // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
+ // could wrap in the address space.
+ if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
+ (StrideAPtr < 0 && StrideBPtr > 0)) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return MemoryDepChecker::Dependence::Unknown;
}
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
if (!HasSameSize)
TypeByteSize = 0;
- uint64_t Stride = std::abs(StrideAPtr);
- return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite,
- BIsWrite);
+ return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
+ std::abs(StrideBPtr), TypeByteSize,
+ AIsWrite, BIsWrite);
}
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);
- const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
+ const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
std::get<DepDistanceStrideAndSizeInfo>(Res);
bool HasSameSize = TypeByteSize > 0;
+ std::optional<uint64_t> CommonStride =
+ StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
+ if (isa<SCEVCouldNotCompute>(Dist)) {
+ // TODO: Relax requirement that there is a common stride to retry with
+ // non-constant distance dependencies.
+ FoundNonConstantDistanceDependence |= !!CommonStride;
+ LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
+ return Dependence::Unknown;
+ }
+
ScalarEvolution &SE = *PSE.getSE();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+
// If the distance between the acecsses is larger than their absolute stride
// multiplied by the backedge taken count, the accesses are independet, i.e.
// they are far enough appart that accesses won't access the same location
// across all loop ierations.
- if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
+ if (HasSameSize && CommonStride &&
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
- Stride, TypeByteSize))
+ *CommonStride, TypeByteSize))
return Dependence::NoDep;
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
- if (!C) {
- LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
- FoundNonConstantDistanceDependence = true;
- return Dependence::Unknown;
- }
- const APInt &Val = C->getAPInt();
- int64_t Distance = Val.getSExtValue();
-
- // If the distance between accesses and their strides are known constants,
- // check whether the accesses interlace each other.
- if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
- areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
- LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
- return Dependence::NoDep;
+ // Attempt to prove strided accesses independent.
+ if (C) {
+ const APInt &Val = C->getAPInt();
+ int64_t Distance = Val.getSExtValue();
+
+ // If the distance between accesses and their strides are known constants,
+ // check whether the accesses interlace each other.
+ if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
+ HasSameSize &&
+ areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
+ TypeByteSize)) {
+ LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+ return Dependence::NoDep;
+ }
}
// Negative distances are not plausible dependencies.
- if (Val.isNegative()) {
+ if (SE.isKnownNonPositive(Dist)) {
+ if (SE.isKnownNonNegative(Dist)) {
+ if (HasSameSize) {
+ // Write to the same location with the same size.
+ return Dependence::Forward;
+ } else {
+ LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
+ "different type sizes\n");
+ return Dependence::Unknown;
+ }
+ }
+
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
// Check if the first access writes to a location that is read in a later
// iteration, where the distance between them is not a multiple of a vector
@@ -2071,27 +2095,37 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
// NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
// forward dependency will allow vectorization using any width.
- if (IsTrueDataDependence && EnableForwardingConflictDetection &&
- (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
- TypeByteSize))) {
- LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
- return Dependence::ForwardButPreventsForwarding;
+
+ if (IsTrueDataDependence && EnableForwardingConflictDetection) {
+ if (!C) {
+ // TODO: Relax requirement that there is a common stride to retry with
+ // non-constant distance dependencies.
+ FoundNonConstantDistanceDependence |= !!CommonStride;
+ return Dependence::Unknown;
+ }
+ if (!HasSameSize ||
+ couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
+ TypeByteSize)) {
+ LLVM_DEBUG(
+ dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
+ return Dependence::ForwardButPreventsForwarding;
+ }
}
LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
return Dependence::Forward;
}
- // Write to the same location with the same size.
- if (Val == 0) {
- if (HasSameSize)
- return Dependence::Forward;
- LLVM_DEBUG(
- dbgs() << "LAA: Zero dependence difference but different type sizes\n");
+ if (!C) {
+ // TODO: Relax requirement that there is a common stride to retry with
+ // non-constant distance dependencies.
+ FoundNonConstantDistanceDependence |= !!CommonStride;
+ LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
return Dependence::Unknown;
}
- assert(Val.isStrictlyPositive() && "Expect a positive value");
+ if (!SE.isKnownPositive(Dist))
+ return Dependence::Unknown;
if (!HasSameSize) {
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2099,6 +2133,14 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
return Dependence::Unknown;
}
+ // The logic below currently only supports StrideA == StrideB, i.e. there's a
+ // common stride.
+ if (!CommonStride)
+ return Dependence::Unknown;
+
+ const APInt &Val = C->getAPInt();
+ int64_t Distance = Val.getSExtValue();
+
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
VectorizerParams::VectorizationFactor : 1);
@@ -2134,7 +2176,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
// the minimum distance needed is 28, which is greater than distance. It is
// not safe to do vectorization.
uint64_t MinDistanceNeeded =
- TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+ TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
<< Distance << '\n');
@@ -2183,7 +2225,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
// since there is a backwards dependency.
- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
+ uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
<< " with max VF = " << MaxVF << '\n');
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index edddfb1b92402f..059900f357e64b 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
// We don't need to check non-wrapping here because forward/backward
// dependence wouldn't be valid if these weren't monotonic accesses.
- auto *Dist = cast<SCEVConstant>(
+ auto *Dist = dyn_cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+ if (!Dist)
+ return false;
const APInt &Val = Dist->getAPInt();
return Val == TypeByteSize * StrideLoad;
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
index 51755314896bb3..5f4c732dc19df0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
@@ -8,10 +8,9 @@ declare void @llvm.assume(i1)
define void @different_non_constant_strides_known_forward(ptr %A) {
; CHECK-LABEL: 'different_non_constant_strides_known_forward'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: Forward:
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
; CHECK-EMPTY:
@@ -45,10 +44,9 @@ exit:
define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) {
; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: Forward:
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
; CHECK-EMPTY:
>From 54a94b7c3173ae334c4dbc555f0b35ac7cd05636 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Apr 2024 10:34:57 +0100
Subject: [PATCH 2/2] [LAA] Pass maximum stride to isSafeDependenceDistance.
As discussed in https://github.com/llvm/llvm-project/pull/88039, support
different strides with isSafeDependenceDistance by passing the maximum
of both strides.
isSafeDependenceDistance tries to prove that
|Dist| > BackedgeTakenCount * Step
holds. Chosing the maximum stride computes the maximum range accesed by
the loop for all strides.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 31 ++++++++++---------
...es-safe-dep-due-to-backedge-taken-count.ll | 19 +++---------
2 files changed, 20 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ed8e749f9d6c2d..1015cc77890cdf 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1805,20 +1805,20 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
}
/// Given a dependence-distance \p Dist between two
-/// memory accesses, that have the same stride whose absolute value is given
-/// in \p Stride, and that have the same type size \p TypeByteSize,
-/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
-/// possible to prove statically that the dependence distance is larger
-/// than the range that the accesses will travel through the execution of
-/// the loop. If so, return true; false otherwise. This is useful for
-/// example in loops such as the following (PR31098):
+/// memory accesses, that have strides in the same direction whose absolute
+/// value of the maximum stride is given in \p MaxStride, and that have the same
+/// type size \p TypeByteSize, in a loop whose takenCount is \p
+/// BackedgeTakenCount, check if it is possible to prove statically that the
+/// dependence distance is larger than the range that the accesses will travel
+/// through the execution of the loop. If so, return true; false otherwise. This
+/// is useful for example in loops such as the following (PR31098):
/// for (i = 0; i < D; ++i) {
/// = out[i];
/// out[i+D] =
/// }
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
const SCEV &BackedgeTakenCount,
- const SCEV &Dist, uint64_t Stride,
+ const SCEV &Dist, uint64_t MaxStride,
uint64_t TypeByteSize) {
// If we can prove that
@@ -1838,7 +1838,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// will be executed only if LoopCount >= VF, proving distance >= LoopCount
// also guarantees that distance >= VF.
//
- const uint64_t ByteStride = Stride * TypeByteSize;
+ const uint64_t ByteStride = MaxStride * TypeByteSize;
const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
@@ -2046,14 +2046,15 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
ScalarEvolution &SE = *PSE.getSE();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t MaxStride = std::max(StrideA, StrideB);
- // If the distance between the acecsses is larger than their absolute stride
- // multiplied by the backedge taken count, the accesses are independet, i.e.
- // they are far enough appart that accesses won't access the same location
- // across all loop ierations.
- if (HasSameSize && CommonStride &&
+ // If the distance between the acecsses is larger than their maximum absolute
+ // stride multiplied by the backedge taken count, the accesses are independet,
+ // i.e. they are far enough appart that accesses won't access the same
+ // location across all loop ierations.
+ if (HasSameSize &&
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
- *CommonStride, TypeByteSize))
+ MaxStride, TypeByteSize))
return Dependence::NoDep;
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
index 932129bbb957fa..8c7df4bdf5a5a8 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
@@ -6,13 +6,8 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) {
; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
-; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
-; CHECK-NEXT: store i32 %add, ptr %gep, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -44,10 +39,9 @@ exit:
define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
+; CHECK-NEXT: Forward:
; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep, align 4
; CHECK-EMPTY:
@@ -82,13 +76,8 @@ exit:
define void @unknown_dep_known_safe_due_to_backedge_taken_count(ptr %A) {
; CHECK-LABEL: 'unknown_dep_known_safe_due_to_backedge_taken_count'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Unknown:
-; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
More information about the llvm-commits
mailing list