[llvm] [LAA] Hoist setting condition for RT-checks (NFC) (PR #128045)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 09:53:31 PDT 2025


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/128045

>From 35c2a0f587b9d7c6c9c64e036adbbec94406edcf Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 20 Feb 2025 16:37:03 +0000
Subject: [PATCH] [LAA] hoist setting condition for rt-checks (NFC)

Strip ShouldRetyWithRuntimeCheck from the
DepedenceDistanceStrideAndSizeInfo struct, and free isDependent from the
responsibility of setting the condition for when runtime-checks are
needed, transferring this responsibility to
getDependenceDistanceStrideAndSize. The change is simple enough, but
proving that this was the original intent of the code, making the change
non-functional is non-trivial.

The proof is as follows. The condition for runtime-checks is only
respected when we return a Dependence::DepType of Dependence::Unknown,
and the only possible place that could introduce a functional change
with this patch is when Dist is possibly zero, but HasSameSize is false.
Previously, we weren't checking that the Dist is not a SCEVConstant, and
setting the runtime-check condition. Now, with the runtime-check
condition hoisted, we do. We note that for a functional change to occur,
the distance must be both non-constant, not provably non-positive and
non-negative, and this is an impossibility.
---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  4 --
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 42 ++++---------------
 ...untime-checks-after-dependence-analysis.ll | 40 ++++++++++--------
 3 files changed, 32 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index cb6f47e3a76be..b724078346903 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -372,8 +372,6 @@ class MemoryDepChecker {
     uint64_t MaxStride;
     std::optional<uint64_t> CommonStride;
 
-    bool ShouldRetryWithRuntimeCheck;
-
     /// TypeByteSize is either the common store size of both accesses, or 0 when
     /// store sizes mismatch.
     uint64_t TypeByteSize;
@@ -383,11 +381,9 @@ class MemoryDepChecker {
 
     DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
                                  std::optional<uint64_t> CommonStride,
-                                 bool ShouldRetryWithRuntimeCheck,
                                  uint64_t TypeByteSize, bool AIsWrite,
                                  bool BIsWrite)
         : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
-          ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck),
           TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
   };
 
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 57a76bc7a81e5..a225bf79486ef 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2009,14 +2009,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   if (StrideAScaled == StrideBScaled)
     CommonStride = StrideAScaled;
 
-  // TODO: Historically, we don't retry with runtime checks unless the
-  // (unscaled) strides are the same. Fix this once the condition for runtime
-  // checks in isDependent is fixed.
-  bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt;
+  // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
+  // to consider retrying with runtime checks. Historically, we did not set it
+  // when (unscaled) strides were different but there is no inherent reason to.
+  if (!isa<SCEVConstant>(Dist))
+    FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;
 
   return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
-                                      ShouldRetryWithRuntimeCheck, TypeByteSize,
-                                      AIsWrite, BIsWrite);
+                                      TypeByteSize, AIsWrite, BIsWrite);
 }
 
 MemoryDepChecker::Dependence::DepType
@@ -2031,15 +2031,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   if (std::holds_alternative<Dependence::DepType>(Res))
     return std::get<Dependence::DepType>(Res);
 
-  auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
-         TypeByteSize, AIsWrite, BIsWrite] =
+  auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
   bool HasSameSize = TypeByteSize > 0;
 
   if (isa<SCEVCouldNotCompute>(Dist)) {
-    // TODO: Relax requirement that there is a common unscaled stride to retry
-    // with non-constant distance dependencies.
-    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
     LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
     return Dependence::Unknown;
   }
@@ -2099,14 +2095,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     // forward dependency will allow vectorization using any width.
 
     if (IsTrueDataDependence && EnableForwardingConflictDetection) {
-      if (!ConstDist) {
-        // TODO: FoundNonConstantDistanceDependence is used as a necessary
-        // condition to consider retrying with runtime checks. Historically, we
-        // did not set it when strides were different but there is no inherent
-        // reason to.
-        FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
+      if (!ConstDist)
         return Dependence::Unknown;
-      }
       if (!HasSameSize ||
           couldPreventStoreLoadForward(
               ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
@@ -2122,22 +2112,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
   // Below we only handle strictly positive distances.
-  if (MinDistance <= 0) {
-    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
+  if (MinDistance <= 0)
     return Dependence::Unknown;
-  }
-
-  if (!ConstDist) {
-    // Previously this case would be treated as Unknown, possibly setting
-    // FoundNonConstantDistanceDependence to force re-trying with runtime
-    // checks. Until the TODO below is addressed, set it here to preserve
-    // original behavior w.r.t. re-trying with runtime checks.
-    // TODO: FoundNonConstantDistanceDependence is used as a necessary
-    // condition to consider retrying with runtime checks. Historically, we
-    // did not set it when strides were different but there is no inherent
-    // reason to.
-    FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
-  }
 
   if (!HasSameSize) {
     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index 114637ea3bf06..eea4a4a6468b0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -320,28 +320,34 @@ exit:
 define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) {
 ; CHECK-LABEL: 'retry_after_dep_check_with_unknown_offset'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l.A = load float, ptr %A, align 4 ->
-; CHECK-NEXT:            store float 0.000000e+00, ptr %A.100.iv.offset.3, align 4
-; CHECK-EMPTY:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l.A = load float, ptr %A, align 4 ->
-; CHECK-NEXT:            store float %l.A, ptr %A.100.iv, align 8
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP16:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP17:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group ([[GRP16]]):
+; CHECK-NEXT:          %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP18:0x[0-9a-f]+]]):
+; CHECK-NEXT:        ptr %A
+; CHECK-NEXT:      Check 2:
+; CHECK-NEXT:        Comparing group ([[GRP17]]):
+; CHECK-NEXT:          %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
+; CHECK-NEXT:        Against group ([[GRP18]]):
+; CHECK-NEXT:        ptr %A
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP16:0x[0-9a-f]+]]:
-; CHECK-NEXT:          (Low: %A High: (4 + %A))
-; CHECK-NEXT:            Member: %A
-; CHECK-NEXT:        Group [[GRP17:0x[0-9a-f]+]]:
-; CHECK-NEXT:          (Low: (100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A) High: (96 + (16 * (zext i32 %offset to i64))<nuw><nsw> + %A))
-; CHECK-NEXT:            Member: {(100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A),+,8}<%loop>
-; CHECK-NEXT:        Group [[GRP18:0x[0-9a-f]+]]:
+; CHECK-NEXT:        Group [[GRP16]]:
 ; CHECK-NEXT:          (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A))
 ; CHECK-NEXT:            Member: {(100 + %A),+,8}<%loop>
+; CHECK-NEXT:        Group [[GRP17]]:
+; CHECK-NEXT:          (Low: (100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A) High: (96 + (16 * (zext i32 %offset to i64))<nuw><nsw> + %A))
+; CHECK-NEXT:            Member: {(100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A),+,8}<%loop>
+; CHECK-NEXT:        Group [[GRP18]]:
+; CHECK-NEXT:          (Low: %A High: (4 + %A))
+; CHECK-NEXT:            Member: %A
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:



More information about the llvm-commits mailing list