[llvm] LAA: be more precise on different store sizes (PR #122318)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 9 09:13:46 PST 2025


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/122318

The HasSameSize checks, which are triggered on different store sizes, in MemDepChecker::isDependent are ad-hoc and imprecise, leading to spurious dependencies and runtime-checks. Identify that the exact scenario in which to bail out is unequal store sizes when dependence distance is zero, and check precisely this condition in
MemDepChecker::getDependenceDistanceAndSize, eliminating all the ad-hoc checks in isDependent and making LoopAccessAnalysis more precise.

>From b4bc97d85aeda73b913ff14733dfd92b21791408 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 9 Jan 2025 16:50:04 +0000
Subject: [PATCH] LAA: be more precise on different store sizes

The HasSameSize checks, which are triggered on different store sizes, in
MemDepChecker::isDependent are ad-hoc and imprecise, leading to spurious
dependencies and runtime-checks. Identify that the exact scenario in
which to bail out is unequal store sizes when dependence distance is
zero, and check precisely this condition in
MemDepChecker::getDependenceDistanceAndSize, eliminating all the ad-hoc
checks in isDependent and making LoopAccessAnalysis more precise.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 45 ++++++++-----------
 .../forward-loop-carried.ll                   |  4 --
 ...endence-distance-different-access-sizes.ll | 31 ++++---------
 ...interleave-allocsize-not-equal-typesize.ll | 41 +++++++++++++----
 4 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 38e9145826c08e..90a9de8bf1a601 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1990,10 +1990,17 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   }
 
   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
-  bool HasSameSize =
-      DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
-  if (!HasSameSize)
-    TypeByteSize = 0;
+
+  // When the distance is zero, we're reading/writing the same memory location:
+  // check that the store sizes are equal. Otherwise, fail with an unknown
+  // dependence for which we should not generate runtime checks.
+  TypeSize AStoreSz = DL.getTypeStoreSizeInBits(ATy),
+           BStoreSz = DL.getTypeStoreSizeInBits(BTy);
+  if (Dist->isZero() && AStoreSz != BStoreSz) {
+    LLVM_DEBUG(
+        dbgs() << "LAA: zero dependence distance but different type sizes\n");
+    return Dependence::Unknown;
+  }
 
   StrideAPtrInt = std::abs(StrideAPtrInt);
   StrideBPtrInt = std::abs(StrideBPtrInt);
@@ -2029,7 +2036,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
          TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
-  bool HasSameSize = TypeByteSize > 0;
 
   if (isa<SCEVCouldNotCompute>(Dist)) {
     // TODO: Relax requirement that there is a common unscaled stride to retry
@@ -2047,9 +2053,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // upper bound of the number of iterations), the accesses are independet, i.e.
   // they are far enough appart that accesses won't access the same location
   // across all loop ierations.
-  if (HasSameSize && isSafeDependenceDistance(
-                         DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
-                         *Dist, MaxStride, TypeByteSize))
+  if (isSafeDependenceDistance(DL, SE,
+                               *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist,
+                               MaxStride, TypeByteSize))
     return Dependence::NoDep;
 
   const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2060,7 +2066,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
     // If the distance between accesses and their strides are known constants,
     // check whether the accesses interlace each other.
-    if (Distance > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
+    if (Distance > 0 && CommonStride && CommonStride > 1 &&
         areStridedAccessesIndependent(Distance, *CommonStride, TypeByteSize)) {
       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
       return Dependence::NoDep;
@@ -2074,15 +2080,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   // Negative distances are not plausible dependencies.
   if (SE.isKnownNonPositive(Dist)) {
-    if (SE.isKnownNonNegative(Dist)) {
-      if (HasSameSize) {
-        // Write to the same location with the same size.
-        return Dependence::Forward;
-      }
-      LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
-                           "different type sizes\n");
-      return Dependence::Unknown;
-    }
+    if (SE.isKnownNonNegative(Dist))
+      // Write to the same location with the same size.
+      return Dependence::Forward;
 
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     // Check if the first access writes to a location that is read in a later
@@ -2102,8 +2102,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
         FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
         return Dependence::Unknown;
       }
-      if (!HasSameSize ||
-          couldPreventStoreLoadForward(
+      if (couldPreventStoreLoadForward(
               ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
         LLVM_DEBUG(
             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
@@ -2134,12 +2133,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
   }
 
-  if (!HasSameSize) {
-    LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
-                         "different type sizes\n");
-    return Dependence::Unknown;
-  }
-
   if (!CommonStride)
     return Dependence::Unknown;
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
index adfd19923e921c..7837c20f003e24 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -70,10 +70,6 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
 ; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
 ; CHECK-NEXT:            %l = load i24, ptr %gep.1, align 1
 ; CHECK-EMPTY:
-; CHECK-NEXT:        Forward:
-; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
-; CHECK-NEXT:            store i24 %l, ptr %ptr.iv, align 1
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
index 08e0bae7f05bac..c43b072b30a8ea 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
@@ -3,26 +3,13 @@
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
-; TODO: No runtime checks should be needed, as the distance between accesses
-; is large enough to need runtime checks.
 define void @test_distance_positive_independent_via_trip_count(ptr %A) {
 ; CHECK-LABEL: 'test_distance_positive_independent_via_trip_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP1]]:
-; CHECK-NEXT:          (Low: (400 + %A)<nuw> High: (804 + %A))
-; CHECK-NEXT:            Member: {(400 + %A)<nuw>,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
-; CHECK-NEXT:          (Low: %A High: (101 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -57,15 +44,15 @@ define void @test_distance_positive_backwards(ptr %A) {
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.1, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP3]]:
+; CHECK-NEXT:        Group [[GRP1]]:
 ; CHECK-NEXT:          (Low: (1 + %A)<nuw> High: (405 + %A))
 ; CHECK-NEXT:            Member: {(1 + %A)<nuw>,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:        Group [[GRP2]]:
 ; CHECK-NEXT:          (Low: %A High: (101 + %A))
 ; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
@@ -100,15 +87,15 @@ define void @test_distance_positive_via_assume(ptr %A, i64 %off) {
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP5]]:
+; CHECK-NEXT:        Group [[GRP3]]:
 ; CHECK-NEXT:          (Low: (%off + %A) High: (404 + %off + %A))
 ; CHECK-NEXT:            Member: {(%off + %A),+,4}<nw><%loop>
-; CHECK-NEXT:        Group [[GRP6]]:
+; CHECK-NEXT:        Group [[GRP4]]:
 ; CHECK-NEXT:          (Low: %A High: (101 + %A))
 ; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
index bd77f9779b680d..6451aa96b04da4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -35,10 +35,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
-; CHECK-NEXT:    [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
@@ -47,7 +47,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
-; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -96,17 +96,42 @@ exit:
 define void @pr58722_store_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @pr58722_store_interleave_group(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP1]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP0]] to i24
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP1]] to i24
+; CHECK-NEXT:    store i24 [[TMP6]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    store i24 [[TMP7]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 5000
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[IV]]
 ; CHECK-NEXT:    store i32 [[IV]], ptr [[GEP_IV]], align 4
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1
 ; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24
 ; CHECK-NEXT:    store i24 [[TRUNC_IV]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV]], 10000
-; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list