[llvm] 933f492 - [LAA] Support different strides & non constant dep distances using SCEV. (#88039)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 25 13:38:11 PDT 2024


Author: Florian Hahn
Date: 2024-04-25T21:38:07+01:00
New Revision: 933f49248bfede6b22d516baa5cf80bdf85c3c61

URL: https://github.com/llvm/llvm-project/commit/933f49248bfede6b22d516baa5cf80bdf85c3c61
DIFF: https://github.com/llvm/llvm-project/commit/933f49248bfede6b22d516baa5cf80bdf85c3c61.diff

LOG: [LAA] Support different strides & non constant dep distances using SCEV. (#88039)

Extend LoopAccessAnalysis to support different strides and as a
consequence non-constant distances between dependences using SCEV to
reason about the direction of the dependence.

In multiple places, logic to rule out dependences using the stride has
been updated to only be used if StrideA == StrideB, i.e. there's a
common stride.

We now also may bail out at multiple places where we may have to set
FoundNonConstantDistanceDependence. This is done when we need to bail
out and the distance is not constant to preserve original behavior.

Fixes https://github.com/llvm/llvm-project/issues/87336

PR: https://github.com/llvm/llvm-project/pull/88039

Added: 
    

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
    llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
    llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b1ba8e7c0f6014..f65515ca387229 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
 namespace {
 struct DepDistanceStrideAndSizeInfo {
   const SCEV *Dist;
-  uint64_t Stride;
+  uint64_t StrideA;
+  uint64_t StrideB;
   uint64_t TypeByteSize;
   bool AIsWrite;
   bool BIsWrite;
 
-  DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
-                               uint64_t TypeByteSize, bool AIsWrite,
-                               bool BIsWrite)
-      : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
-        AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+  DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
+                               uint64_t StrideB, uint64_t TypeByteSize,
+                               bool AIsWrite, bool BIsWrite)
+      : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
+        TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
 };
 } // namespace
 
-// Get the dependence distance, stride, type size and whether it is a write for
+// Get the dependence distance, strides, type size and whether it is a write for
 // the dependence between A and B. Returns a DepType, if we can prove there's
 // no dependence or the analysis fails. Outlined to lambda to limit he scope
 // of various temporary variables, like A/BPtr, StrideA/BPtr and others.
@@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
                                    InnermostLoop))
     return MemoryDepChecker::Dependence::IndirectUnsafe;
 
-  // Need accesses with constant stride. We don't want to vectorize
-  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
-  // in the address space.
-  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) {
+  // Need accesses with constant strides and the same direction. We don't want
+  // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
+  // could wrap in the address space.
+  if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
+      (StrideAPtr < 0 && StrideBPtr > 0)) {
     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
     return MemoryDepChecker::Dependence::Unknown;
   }
@@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize(
       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
   if (!HasSameSize)
     TypeByteSize = 0;
-  uint64_t Stride = std::abs(StrideAPtr);
-  return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite,
-                                      BIsWrite);
+  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
+                                      std::abs(StrideBPtr), TypeByteSize,
+                                      AIsWrite, BIsWrite);
 }
 
 MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
@@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   if (std::holds_alternative<Dependence::DepType>(Res))
     return std::get<Dependence::DepType>(Res);
 
-  const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
+  const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
   bool HasSameSize = TypeByteSize > 0;
 
+  std::optional<uint64_t> CommonStride =
+      StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
+  if (isa<SCEVCouldNotCompute>(Dist)) {
+    // TODO: Relax requirement that there is a common stride to retry with
+    // non-constant distance dependencies.
+    FoundNonConstantDistanceDependence |= !!CommonStride;
+    LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
+    return Dependence::Unknown;
+  }
+
   ScalarEvolution &SE = *PSE.getSE();
   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+
   // If the distance between the acecsses is larger than their absolute stride
   // multiplied by the backedge taken count, the accesses are independet, i.e.
   // they are far enough appart that accesses won't access the same location
   // across all loop ierations.
-  if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
+  if (HasSameSize && CommonStride &&
       isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
-                               Stride, TypeByteSize))
+                               *CommonStride, TypeByteSize))
     return Dependence::NoDep;
 
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
-  if (!C) {
-    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
-    FoundNonConstantDistanceDependence = true;
-    return Dependence::Unknown;
-  }
 
-  const APInt &Val = C->getAPInt();
-  int64_t Distance = Val.getSExtValue();
-
-  // If the distance between accesses and their strides are known constants,
-  // check whether the accesses interlace each other.
-  if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
-      areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
-    LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
-    return Dependence::NoDep;
+  // Attempt to prove strided accesses independent.
+  if (C) {
+    const APInt &Val = C->getAPInt();
+    int64_t Distance = Val.getSExtValue();
+
+    // If the distance between accesses and their strides are known constants,
+    // check whether the accesses interlace each other.
+    if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
+        HasSameSize &&
+        areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
+                                      TypeByteSize)) {
+      LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+      return Dependence::NoDep;
+    }
   }
 
   // Negative distances are not plausible dependencies.
-  if (Val.isNegative()) {
+  if (SE.isKnownNonPositive(Dist)) {
+    if (SE.isKnownNonNegative(Dist)) {
+      if (HasSameSize) {
+        // Write to the same location with the same size.
+        return Dependence::Forward;
+      } else {
+        LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence 
diff erence but "
+                             "
diff erent type sizes\n");
+        return Dependence::Unknown;
+      }
+    }
+
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     // Check if the first access writes to a location that is read in a later
     // iteration, where the distance between them is not a multiple of a vector
@@ -2071,27 +2095,40 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
     // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
     // forward dependency will allow vectorization using any width.
-    if (IsTrueDataDependence && EnableForwardingConflictDetection &&
-        (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(),
-                                                      TypeByteSize))) {
-      LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
-      return Dependence::ForwardButPreventsForwarding;
+
+    if (IsTrueDataDependence && EnableForwardingConflictDetection) {
+      if (!C) {
+        // TODO: FoundNonConstantDistanceDependence is used as a necessary
+        // condition to consider retrying with runtime checks. Historically, we
+        // did not set it when strides were 
diff erent but there is no inherent
+        // reason to.
+        FoundNonConstantDistanceDependence |= CommonStride.has_value();
+        return Dependence::Unknown;
+      }
+      if (!HasSameSize ||
+          couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
+                                       TypeByteSize)) {
+        LLVM_DEBUG(
+            dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
+        return Dependence::ForwardButPreventsForwarding;
+      }
     }
 
     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
     return Dependence::Forward;
   }
 
-  // Write to the same location with the same size.
-  if (Val == 0) {
-    if (HasSameSize)
-      return Dependence::Forward;
-    LLVM_DEBUG(
-        dbgs() << "LAA: Zero dependence 
diff erence but 
diff erent type sizes\n");
+  if (!C) {
+    // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
+    // to consider retrying with runtime checks. Historically, we did not set it
+    // when strides were 
diff erent but there is no inherent reason to.
+    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
     return Dependence::Unknown;
   }
 
-  assert(Val.isStrictlyPositive() && "Expect a positive value");
+  if (!SE.isKnownPositive(Dist))
+    return Dependence::Unknown;
 
   if (!HasSameSize) {
     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2099,6 +2136,14 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
     return Dependence::Unknown;
   }
 
+  // The logic below currently only supports StrideA ==  StrideB, i.e. there's a
+  // common stride.
+  if (!CommonStride)
+    return Dependence::Unknown;
+
+  const APInt &Val = C->getAPInt();
+  int64_t Distance = Val.getSExtValue();
+
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
                            VectorizerParams::VectorizationFactor : 1);
@@ -2134,7 +2179,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
   // the minimum distance needed is 28, which is greater than distance. It is
   // not safe to do vectorization.
   uint64_t MinDistanceNeeded =
-      TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
+      TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize;
   if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
                       << Distance << '\n');
@@ -2183,7 +2228,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
 
   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
   // since there is a backwards dependency.
-  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
+  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride));
   LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
                     << " with max VF = " << MaxVF << '\n');
   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;

diff  --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index edddfb1b92402f..059900f357e64b 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
 
     // We don't need to check non-wrapping here because forward/backward
     // dependence wouldn't be valid if these weren't monotonic accesses.
-    auto *Dist = cast<SCEVConstant>(
+    auto *Dist = dyn_cast<SCEVConstant>(
         PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+    if (!Dist)
+      return false;
     const APInt &Val = Dist->getAPInt();
     return Val == TypeByteSize * StrideLoad;
   }

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/
diff erent-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/
diff erent-strides-safe-dep-due-to-backedge-taken-count.ll
index 932129bbb957fa..5312c36e436a21 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/
diff erent-strides-safe-dep-due-to-backedge-taken-count.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/
diff erent-strides-safe-dep-due-to-backedge-taken-count.ll
@@ -6,10 +6,9 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) {
 ; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
@@ -44,10 +43,9 @@ exit:
 define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
 ; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
index 51755314896bb3..5f4c732dc19df0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll
@@ -8,10 +8,9 @@ declare void @llvm.assume(i1)
 define void @
diff erent_non_constant_strides_known_forward(ptr %A) {
 ; CHECK-LABEL: '
diff erent_non_constant_strides_known_forward'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:
@@ -45,10 +44,9 @@ exit:
 define void @
diff erent_non_constant_strides_known_forward_min_distance_3(ptr %A) {
 ; CHECK-LABEL: '
diff erent_non_constant_strides_known_forward_min_distance_3'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.mul.2, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep, align 4
 ; CHECK-EMPTY:


        


More information about the llvm-commits mailing list