[llvm] 78777a2 - [LV]Split store-load forward distance analysis from other checks, NFC (#121156)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 04:28:48 PDT 2025
Author: Alexey Bataev
Date: 2025-03-31T07:28:44-04:00
New Revision: 78777a204ad9a3f17f04f90040f88855f47aa50f
URL: https://github.com/llvm/llvm-project/commit/78777a204ad9a3f17f04f90040f88855f47aa50f
DIFF: https://github.com/llvm/llvm-project/commit/78777a204ad9a3f17f04f90040f88855f47aa50f.diff
LOG: [LV]Split store-load forward distance analysis from other checks, NFC (#121156)
The patch splits the store-load forwarding distance analysis from other
dependency analysis in LAA. Currently it supports only power-of-2
distances, required to support non-power-of-2 distances in future.
Part of #100755
Added:
Modified:
llvm/include/llvm/Analysis/LoopAccessAnalysis.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll
llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index cb6f47e3a76be..f715e0ec8dbb4 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -216,6 +216,21 @@ class MemoryDepChecker {
return MaxSafeVectorWidthInBits;
}
+ /// Return true if there are no store-load forwarding dependencies.
+ bool isSafeForAnyStoreLoadForwardDistances() const {
+ return MaxStoreLoadForwardSafeDistanceInBits ==
+ std::numeric_limits<uint64_t>::max();
+ }
+
+ /// Return safe power-of-2 number of elements, which do not prevent store-load
+ /// forwarding, multiplied by the size of the elements in bits.
+ uint64_t getStoreLoadForwardSafeDistanceInBits() const {
+ assert(!isSafeForAnyStoreLoadForwardDistances() &&
+ "Expected the distance, that prevent store-load forwarding, to be "
+ "set.");
+ return MaxStoreLoadForwardSafeDistanceInBits;
+ }
+
/// In same cases when the dependency check fails we can still
/// vectorize the loop with a dynamic array access check.
bool shouldRetryWithRuntimeCheck() const {
@@ -304,6 +319,11 @@ class MemoryDepChecker {
/// restrictive.
uint64_t MaxSafeVectorWidthInBits = -1U;
+ /// Maximum power-of-2 number of elements, which do not prevent store-load
+ /// forwarding, multiplied by the size of the elements in bits.
+ uint64_t MaxStoreLoadForwardSafeDistanceInBits =
+ std::numeric_limits<uint64_t>::max();
+
/// If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
bool FoundNonConstantDistanceDependence = false;
@@ -357,7 +377,8 @@ class MemoryDepChecker {
///
/// \return false if we shouldn't vectorize at all or avoid larger
/// vectorization factors by limiting MinDepDistBytes.
- bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
+ bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize,
+ unsigned CommonStride = 0);
/// Updates the current safety status with \p S. We can go from Safe to
/// either PossiblySafeWithRtChecks or Unsafe and from
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c3a04f9373dbe..d654ac3ec9273 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -382,7 +382,8 @@ class LoopVectorizationLegality {
const LoopAccessInfo *getLAI() const { return LAI; }
bool isSafeForAnyVectorWidth() const {
- return LAI->getDepChecker().isSafeForAnyVectorWidth();
+ return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
+ LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
}
uint64_t getMaxSafeVectorWidthInBits() const {
@@ -406,6 +407,17 @@ class LoopVectorizationLegality {
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
}
+ /// Return true if there is store-load forwarding dependencies.
+ bool isSafeForAnyStoreLoadForwardDistances() const {
+ return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
+ }
+
+ /// Return safe power-of-2 number of elements, which do not prevent store-load
+ /// forwarding and safe to operate simultaneously.
+ uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
+ return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
+ }
+
/// Returns true if vector representation of the instruction \p I
/// requires mask.
bool isMaskRequired(const Instruction *I) const {
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7f1b5dc3890a9..dd7b796fd0fdf 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1740,7 +1740,8 @@ bool MemoryDepChecker::Dependence::isForward() const {
}
bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
- uint64_t TypeByteSize) {
+ uint64_t TypeByteSize,
+ unsigned CommonStride) {
// If loads occur at a distance that is not a multiple of a feasible vector
// factor store-load forwarding does not take place.
// Positive dependences might cause troubles because vectorizing them might
@@ -1755,31 +1756,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// cause any slowdowns.
const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
// Maximum vector factor.
- uint64_t MaxVFWithoutSLForwardIssues = std::min(
- VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
+ uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
+ std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
+ MaxStoreLoadForwardSafeDistanceInBits);
// Compute the smallest VF at which the store and load would be misaligned.
- for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
- VF *= 2) {
+ for (uint64_t VF = 2 * TypeByteSize;
+ VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
// If the number of vector iteration between the store and the load are
// small we could incur conflicts.
if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
- MaxVFWithoutSLForwardIssues = (VF >> 1);
+ MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
break;
}
}
- if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
+ if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
LLVM_DEBUG(
dbgs() << "LAA: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}
- if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
- MaxVFWithoutSLForwardIssues !=
- VectorizerParams::MaxVectorWidth * TypeByteSize)
- MinDepDistBytes = MaxVFWithoutSLForwardIssues;
+ if (CommonStride &&
+ MaxVFWithoutSLForwardIssuesPowerOf2 <
+ MaxStoreLoadForwardSafeDistanceInBits &&
+ MaxVFWithoutSLForwardIssuesPowerOf2 !=
+ VectorizerParams::MaxVectorWidth * TypeByteSize) {
+ uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride;
+ uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+ MaxStoreLoadForwardSafeDistanceInBits =
+ std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
+ }
return false;
}
@@ -2227,20 +2235,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
- uint64_t MinDepDistBytesOld = MinDepDistBytes;
if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
- couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
- // Sanity check that we didn't update MinDepDistBytes when calling
- // couldPreventStoreLoadForward
- assert(MinDepDistBytes == MinDepDistBytesOld &&
- "An update to MinDepDistBytes requires an update to "
- "MaxSafeVectorWidthInBits");
- (void)MinDepDistBytesOld;
+ couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
return Dependence::BackwardVectorizableButPreventsForwarding;
- }
- // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
- // since there is a backwards dependency.
uint64_t MaxVF = MinDepDistBytes / *CommonStride;
LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
<< " with max VF = " << MaxVF << '\n');
@@ -3005,6 +3003,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (!DC.isSafeForAnyVectorWidth())
OS << " with a maximum safe vector width of "
<< DC.getMaxSafeVectorWidthInBits() << " bits";
+ if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
+ uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
+ OS << ", with a maximum safe store-load forward width of " << SLDist
+ << " bits";
+ }
if (PtrRtChecking->Need)
OS << " with run-time checks";
OS << "\n";
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dd392056a07ee..c3520dc95f8b4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3815,13 +3815,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
// It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
// the memory accesses that is most restrictive (involved in the smallest
// dependence distance).
- unsigned MaxSafeElements =
- llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+ unsigned MaxSafeElementsPowerOf2 =
+ bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+ if (!Legal->isSafeForAnyStoreLoadForwardDistances()) {
+ unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits();
+ MaxSafeElementsPowerOf2 =
+ std::min(MaxSafeElementsPowerOf2, SLDist / WidestType);
+ }
+ auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2);
+ auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2);
- auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
- auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
if (!Legal->isSafeForAnyVectorWidth())
- this->MaxSafeElements = MaxSafeElements;
+ this->MaxSafeElements = MaxSafeElementsPowerOf2;
LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
<< ".\n");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll
index efa3100464759..8e249b36f6445 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll
@@ -4,7 +4,7 @@
; for (i = 0; i < n; i++)
; A[i + 4] = A[i] * 2;
-; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits
+; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
index ef19e173b6599..335ad67faee04 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
@@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body
define void @vectorizable_Read_Write(ptr nocapture %A) {
; CHECK-LABEL: 'vectorizable_Read_Write'
; CHECK-NEXT: for.body:
-; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits
+; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits
; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 ->
More information about the llvm-commits
mailing list