[llvm] LAA: generalize strides over unequal type sizes (PR #108088)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 02:24:24 PDT 2024


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/108088

>From a25b2b4a92db0815a913b21f25c5d4880ee0d0e5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 10 Sep 2024 11:54:02 +0100
Subject: [PATCH 1/3] LAA: generalize strides over unequal type sizes

getDepdenceDistanceStrideAndSize currently returns a non-zero
TypeByteSize only if the type-sizes of the source and sink are equal.
The non-zero TypeByteSize is then used by isDependent to scale the
strides, the distance between the accesses, and the VF. This restriction
is very artificial, as the stride sizes can be scaled by the respective
type-sizes in advance, freeing isDependent of this responsibility, and
removing the ugly special-case of zero-TypeByteSize. The patch also has
the side-effect of fixing the long-standing TODO of requesting
runtime-checks when the strides are unequal.

The test impact of this patch is that several false depdendencies are
eliminated, and several unknown depdendencies now come with
runtime-checks instead.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 143 ++++++++----------
 .../LoopAccessAnalysis/depend_diff_types.ll   |  10 +-
 ...es-safe-dep-due-to-backedge-taken-count.ll |  22 ++-
 .../forward-loop-carried.ll                   |   4 -
 .../non-constant-strides-backward.ll          | 108 ++++++++-----
 ...endence-distance-different-access-sizes.ll |  45 ++----
 ...interleave-allocsize-not-equal-typesize.ll |  49 ++++--
 .../LoopVectorize/memory-dep-remarks.ll       |  30 +---
 8 files changed, 211 insertions(+), 200 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 980f142f113265..46fbf6c0e0e71f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1805,8 +1805,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
 ///     }
 static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
                                      const SCEV &MaxBTC, const SCEV &Dist,
-                                     uint64_t MaxStride,
-                                     uint64_t TypeByteSize) {
+                                     uint64_t MaxStride) {
 
   // If we can prove that
   //      (**) |Dist| > MaxBTC * Step
@@ -1825,8 +1824,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
   // also guarantees that distance >= VF.
   //
-  const uint64_t ByteStride = MaxStride * TypeByteSize;
-  const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride);
+  const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
   const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
 
   const SCEV *CastedDist = &Dist;
@@ -1870,9 +1868,7 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
   if (Distance % TypeByteSize)
     return false;
 
-  uint64_t ScaledDist = Distance / TypeByteSize;
-
-  // No dependence if the scaled distance is not multiple of the stride.
+  // No dependence if the distance is not multiple of the stride.
   // E.g.
   //      for (i = 0; i < 1024 ; i += 4)
   //        A[i+2] = A[i] + 1;
@@ -1888,7 +1884,7 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
   // Two accesses in memory (scaled distance is 4, stride is 3):
   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
   //     |      |      |      |      | A[4] |      |      | A[7] |      |
-  return ScaledDist % Stride;
+  return Distance % Stride;
 }
 
 std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1927,6 +1923,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   if (StrideAPtr && *StrideAPtr < 0) {
     std::swap(Src, Sink);
     std::swap(AInst, BInst);
+    std::swap(ATy, BTy);
     std::swap(StrideAPtr, StrideBPtr);
   }
 
@@ -1970,31 +1967,51 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
     return MemoryDepChecker::Dependence::IndirectUnsafe;
   }
 
-  int64_t StrideAPtrInt = *StrideAPtr;
-  int64_t StrideBPtrInt = *StrideBPtr;
-  LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << StrideAPtrInt
-                    << " Sink induction step: " << StrideBPtrInt << "\n");
+  LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << *StrideAPtr
+                    << " Sink induction step: " << *StrideBPtr << "\n");
+
+  // Note that store size is different from alloc size, which is depedent on
+  // store size. We use the former for checking illegal cases, and the latter
+  // for scaling strides.
+  TypeSize AStoreSz = DL.getTypeStoreSizeInBits(ATy),
+           BStoreSz = DL.getTypeStoreSizeInBits(BTy);
+
+  // When the distance is zero, we're reading/writing the same memory location:
+  // check that the store sizes are equal. Otherwise, fail with an unknown
+  // dependence for which we should not generate runtime checks.
+  if (Dist->isZero() && AStoreSz != BStoreSz)
+    return MemoryDepChecker::Dependence::Unknown;
+
+  // We can't get get a uint64_t for the AllocSize if either of the store sizes
+  // are scalable.
+  if (AStoreSz.isScalable() || BStoreSz.isScalable())
+    return MemoryDepChecker::Dependence::Unknown;
+
+  // The TypeByteSize is used to scale Distance and VF. In these contexts, the
+  // only size that matters is the size of the Sink.
+  uint64_t ASz = DL.getTypeAllocSize(ATy),
+           TypeByteSize = DL.getTypeAllocSize(BTy);
+
+  // We scale the strides by the alloc-type-sizes, so we can check that the
+  // common distance is equal when ASz != BSz.
+  int64_t StrideAScaled = *StrideAPtr * ASz;
+  int64_t StrideBScaled = *StrideBPtr * TypeByteSize;
+
   // At least Src or Sink are loop invariant and the other is strided or
   // invariant. We can generate a runtime check to disambiguate the accesses.
-  if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
+  if (!StrideAScaled || !StrideBScaled)
     return MemoryDepChecker::Dependence::Unknown;
 
   // Both Src and Sink have a constant stride, check if they are in the same
   // direction.
-  if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
-      (StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
+  if (StrideAScaled > 0 != StrideBScaled > 0) {
     LLVM_DEBUG(
         dbgs() << "Pointer access with strides in different directions\n");
     return MemoryDepChecker::Dependence::Unknown;
   }
 
-  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
-  bool HasSameSize =
-      DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
-  if (!HasSameSize)
-    TypeByteSize = 0;
-  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
-                                      std::abs(StrideBPtrInt), TypeByteSize,
+  return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAScaled),
+                                      std::abs(StrideBScaled), TypeByteSize,
                                       AIsWrite, BIsWrite);
 }
 
@@ -2012,15 +2029,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
       std::get<DepDistanceStrideAndSizeInfo>(Res);
-  bool HasSameSize = TypeByteSize > 0;
 
   std::optional<uint64_t> CommonStride =
       StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
   if (isa<SCEVCouldNotCompute>(Dist)) {
-    // TODO: Relax requirement that there is a common stride to retry with
-    // non-constant distance dependencies.
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
     LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
+    FoundNonConstantDistanceDependence = true;
     return Dependence::Unknown;
   }
 
@@ -2033,24 +2047,20 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // upper bound of the number of iterations), the accesses are independet, i.e.
   // they are far enough appart that accesses won't access the same location
   // across all loop ierations.
-  if (HasSameSize && isSafeDependenceDistance(
-                         DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
-                         *Dist, MaxStride, TypeByteSize))
+  if (isSafeDependenceDistance(
+          DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
     return Dependence::NoDep;
 
-  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+  const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
 
   // Attempt to prove strided accesses independent.
-  if (C) {
-    const APInt &Val = C->getAPInt();
-    int64_t Distance = Val.getSExtValue();
+  if (ConstDist) {
+    int64_t Distance = std::abs(ConstDist->getAPInt().getSExtValue());
 
     // If the distance between accesses and their strides are known constants,
     // check whether the accesses interlace each other.
-    if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 &&
-        HasSameSize &&
-        areStridedAccessesIndependent(std::abs(Distance), *CommonStride,
-                                      TypeByteSize)) {
+    if (Distance > 0 && CommonStride && CommonStride > 1 &&
+        areStridedAccessesIndependent(Distance, *CommonStride, TypeByteSize)) {
       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
       return Dependence::NoDep;
     }
@@ -2063,15 +2073,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   // Negative distances are not plausible dependencies.
   if (SE.isKnownNonPositive(Dist)) {
-    if (SE.isKnownNonNegative(Dist)) {
-      if (HasSameSize) {
-        // Write to the same location with the same size.
-        return Dependence::Forward;
-      }
-      LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
-                           "different type sizes\n");
-      return Dependence::Unknown;
-    }
+    if (SE.isKnownNonNegative(Dist))
+      // Write to the same location.
+      return Dependence::Forward;
 
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     // Check if the first access writes to a location that is read in a later
@@ -2083,17 +2087,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     // forward dependency will allow vectorization using any width.
 
     if (IsTrueDataDependence && EnableForwardingConflictDetection) {
-      if (!C) {
-        // TODO: FoundNonConstantDistanceDependence is used as a necessary
-        // condition to consider retrying with runtime checks. Historically, we
-        // did not set it when strides were different but there is no inherent
-        // reason to.
-        FoundNonConstantDistanceDependence |= CommonStride.has_value();
+      if (!ConstDist) {
+        FoundNonConstantDistanceDependence = true;
         return Dependence::Unknown;
       }
-      if (!HasSameSize ||
-          couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(),
-                                       TypeByteSize)) {
+      if (couldPreventStoreLoadForward(
+              ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
         LLVM_DEBUG(
             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
         return Dependence::ForwardButPreventsForwarding;
@@ -2107,27 +2106,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
   // Below we only handle strictly positive distances.
   if (MinDistance <= 0) {
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
+    FoundNonConstantDistanceDependence = true;
     return Dependence::Unknown;
   }
 
-  if (!isa<SCEVConstant>(Dist)) {
-    // Previously this case would be treated as Unknown, possibly setting
-    // FoundNonConstantDistanceDependence to force re-trying with runtime
-    // checks. Until the TODO below is addressed, set it here to preserve
-    // original behavior w.r.t. re-trying with runtime checks.
-    // TODO: FoundNonConstantDistanceDependence is used as a necessary
-    // condition to consider retrying with runtime checks. Historically, we
-    // did not set it when strides were different but there is no inherent
-    // reason to.
-    FoundNonConstantDistanceDependence |= CommonStride.has_value();
-  }
-
-  if (!HasSameSize) {
-    LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
-                         "different type sizes\n");
-    return Dependence::Unknown;
-  }
+  if (!ConstDist)
+    FoundNonConstantDistanceDependence = true;
 
   if (!CommonStride)
     return Dependence::Unknown;
@@ -2142,8 +2126,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   // It's not vectorizable if the distance is smaller than the minimum distance
   // needed for a vectroized/unrolled version. Vectorizing one iteration in
-  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
-  // TypeByteSize (No need to plus the last gap distance).
+  // front needs CommonStride. Vectorizing the last iteration needs TypeByteSize
+  // (No need to plus the last gap distance).
   //
   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
   //      foo(int *A) {
@@ -2170,10 +2154,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // We know that Dist is positive, but it may not be constant. Use the signed
   // minimum for computations below, as this ensures we compute the closest
   // possible dependence distance.
-  uint64_t MinDistanceNeeded =
-      TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize;
+  uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1) + TypeByteSize;
   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
-    if (!isa<SCEVConstant>(Dist)) {
+    if (!ConstDist) {
       // For non-constant distances, we checked the lower bound of the
       // dependence distance and the distance may be larger at runtime (and safe
       // for vectorization). Classify it as Unknown, so we re-try with runtime
@@ -2228,12 +2211,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
   // since there is a backwards dependency.
-  uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
+  uint64_t MaxVF = MinDepDistBytes / *CommonStride;
   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
                     << " with max VF = " << MaxVF << '\n');
 
   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
-  if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) {
+  if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
     // For non-constant distances, we checked the lower bound of the dependence
     // distance and the distance may be larger at runtime (and safe for
     // vectorization). Classify it as Unknown, so we re-try with runtime checks.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
index 81d8b01fe7fb72..809b15b2004952 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -130,16 +130,8 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
 ; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
 ; CHECK-NEXT:    loop:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:  Backward loop carried data dependence that prevents store-to-load forwarding.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %ld.f64 = load double, ptr %gep.iv, align 8 ->
-; CHECK-NEXT:            store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
-; CHECK-EMPTY:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %ld.i64 = load i64, ptr %gep.iv, align 8 ->
-; CHECK-NEXT:            store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
-; CHECK-EMPTY:
 ; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
 ; CHECK-NEXT:            %ld.f64 = load double, ptr %gep.iv, align 8 ->
 ; CHECK-NEXT:            store double %val, ptr %gep.iv.101.i64, align 8
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
index 8c7df4bdf5a5a8..2fd4d5ae06094e 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
@@ -106,18 +106,24 @@ exit:
   ret void
 }
 
-define void @unknown_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
-; CHECK-LABEL: 'unknown_dep_not_known_safe_due_to_backedge_taken_count'
+define void @unknown_dep_safe_with_rtchecks(ptr %A) {
+; CHECK-LABEL: 'unknown_dep_safe_with_rtchecks'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.510, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP1]]:
+; CHECK-NEXT:          (Low: (2040 + %A)<nuw> High: (4084 + %A))
+; CHECK-NEXT:            Member: {(2040 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP2]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
index adfd19923e921c..7837c20f003e24 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -70,10 +70,6 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
 ; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
 ; CHECK-NEXT:            %l = load i24, ptr %gep.1, align 1
 ; CHECK-EMPTY:
-; CHECK-NEXT:        Forward:
-; CHECK-NEXT:            store i32 0, ptr %gep.2, align 4 ->
-; CHECK-NEXT:            store i24 %l, ptr %ptr.iv, align 1
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
index 416742a94e0d36..d0e717c22cece5 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
@@ -8,15 +8,21 @@ declare void @llvm.assume(i1)
 define void @different_non_constant_strides_known_backward(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP1]]:
+; CHECK-NEXT:          (Low: %A High: (2044 + %A))
+; CHECK-NEXT:            Member: {%A,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP2]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -45,15 +51,21 @@ exit:
 define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.1024, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP3]]:
+; CHECK-NEXT:          (Low: (1024 + %A)<nuw> High: (3068 + %A))
+; CHECK-NEXT:            Member: {(1024 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A)<nuw>)
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -83,15 +95,21 @@ exit:
 define void @different_non_constant_strides_known_backward_min_distance_16(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_16'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.16, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP5]]:
+; CHECK-NEXT:          (Low: (16 + %A)<nuw> High: (2060 + %A))
+; CHECK-NEXT:            Member: {(16 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP6]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -121,15 +139,21 @@ exit:
 define void @different_non_constant_strides_known_backward_min_distance_15(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_15'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP7:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.15, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP8:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP7]]:
+; CHECK-NEXT:          (Low: (15 + %A)<nuw> High: (2059 + %A))
+; CHECK-NEXT:            Member: {(15 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP8]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -159,15 +183,21 @@ exit:
 define void @different_non_constant_strides_known_backward_min_distance_8(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_8'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP9:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.8, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP10:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP9]]:
+; CHECK-NEXT:          (Low: (8 + %A)<nuw> High: (2052 + %A))
+; CHECK-NEXT:            Member: {(8 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP10]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -197,15 +227,21 @@ exit:
 define void @different_non_constant_strides_known_backward_min_distance_3(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_3'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
-; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
-; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP11:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.mul.2 = getelementptr inbounds i32, ptr %A.3, i64 %iv.mul.2
+; CHECK-NEXT:        Against group ([[GRP12:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep = getelementptr inbounds i32, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP11]]:
+; CHECK-NEXT:          (Low: (3 + %A)<nuw> High: (2047 + %A))
+; CHECK-NEXT:            Member: {(3 + %A)<nuw>,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP12]]:
+; CHECK-NEXT:          (Low: %A High: (1024 + %A))
+; CHECK-NEXT:            Member: {%A,+,4}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
index 08e0bae7f05bac..ffd4619fd88dc9 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll
@@ -3,26 +3,13 @@
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
-; TODO: No runtime checks should be needed, as the distance between accesses
-; is large enough to need runtime checks.
 define void @test_distance_positive_independent_via_trip_count(ptr %A) {
 ; CHECK-LABEL: 'test_distance_positive_independent_via_trip_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
-; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP1]]:
-; CHECK-NEXT:          (Low: (400 + %A)<nuw> High: (804 + %A))
-; CHECK-NEXT:            Member: {(400 + %A)<nuw>,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP2]]:
-; CHECK-NEXT:          (Low: %A High: (101 + %A))
-; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -41,7 +28,7 @@ loop:
   %ext = zext i8 %l to i32
   store i32 %ext, ptr %gep.A.400, align 4
   %iv.next = add nuw nsw i64 %iv, 1
-  %ec = icmp eq i64 %iv, 100
+  %ec = icmp eq i64 %iv.next, 100
   br i1 %ec, label %exit, label %loop
 
 exit:
@@ -57,16 +44,16 @@ define void @test_distance_positive_backwards(ptr %A) {
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP1:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.1, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP2:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP3]]:
-; CHECK-NEXT:          (Low: (1 + %A)<nuw> High: (405 + %A))
+; CHECK-NEXT:        Group [[GRP1]]:
+; CHECK-NEXT:          (Low: (1 + %A)<nuw> High: (401 + %A))
 ; CHECK-NEXT:            Member: {(1 + %A)<nuw>,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP4]]:
-; CHECK-NEXT:          (Low: %A High: (101 + %A))
+; CHECK-NEXT:        Group [[GRP2]]:
+; CHECK-NEXT:          (Low: %A High: (100 + %A))
 ; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
@@ -86,7 +73,7 @@ loop:
   %ext = zext i8 %l to i32
   store i32 %ext, ptr %gep.A.400, align 4
   %iv.next = add nuw nsw i64 %iv, 1
-  %ec = icmp eq i64 %iv, 100
+  %ec = icmp eq i64 %iv.next, 100
   br i1 %ec, label %exit, label %loop
 
 exit:
@@ -100,16 +87,16 @@ define void @test_distance_positive_via_assume(ptr %A, i64 %off) {
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP3:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP4:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP5]]:
-; CHECK-NEXT:          (Low: (%off + %A) High: (404 + %off + %A))
+; CHECK-NEXT:        Group [[GRP3]]:
+; CHECK-NEXT:          (Low: (%off + %A) High: (400 + %off + %A))
 ; CHECK-NEXT:            Member: {(%off + %A),+,4}<nw><%loop>
-; CHECK-NEXT:        Group [[GRP6]]:
-; CHECK-NEXT:          (Low: %A High: (101 + %A))
+; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:          (Low: %A High: (100 + %A))
 ; CHECK-NEXT:            Member: {%A,+,1}<nuw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
@@ -131,7 +118,7 @@ loop:
   %ext = zext i8 %l to i32
   store i32 %ext, ptr %gep.A.400, align 4
   %iv.next = add nuw nsw i64 %iv, 1
-  %ec = icmp eq i64 %iv, 100
+  %ec = icmp eq i64 %iv.next, 100
   br i1 %ec, label %exit, label %loop
 
 exit:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
index 014620487c202c..7215f2422ccf29 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -11,10 +11,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
-; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[UGLYGEP]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 40004
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 80007
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -36,10 +36,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
-; CHECK-NEXT:    [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
@@ -48,7 +48,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0
-; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP24]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -97,17 +97,42 @@ exit:
 define void @pr58722_store_interleave_group(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @pr58722_store_interleave_group(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP1]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i32 [[TMP0]] to i24
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP1]] to i24
+; CHECK-NEXT:    store i24 [[TMP6]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    store i24 [[TMP7]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 5000
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[IV]]
 ; CHECK-NEXT:    store i32 [[IV]], ptr [[GEP_IV]], align 4
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[GEP_IV]], i64 1
 ; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i32 [[IV]] to i24
 ; CHECK-NEXT:    store i24 [[TRUNC_IV]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV]], 10000
-; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
index d96d85512621c6..b7d36201cf7711 100644
--- a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
@@ -259,19 +259,18 @@ for.body:                                         ; preds = %entry, %for.body
     ret void
 }
 
-; // g) Dependence::Unknown
-; // Different stride lengths
-; void test_unknown_dep(int n, int* A) {
+; // g) Dependence::NoDep
+; // Different stride lengths that don't overlap.
+; void test_nodep(int n, int* A) {
 ;   for(int i=0; i < n; ++i) {
 ;       A[(i+1)*4] = 10;
 ;       A[i] = 100;
 ;   }
 ; }
 
-; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7
+; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
 
-define void @test_unknown_dep(i64 %n, ptr nocapture %A) !dbg !214 {
+define void @test_no_overlap(i64 %n, ptr nocapture %A) !dbg !214 {
 entry:
   %cmp8 = icmp sgt i64 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -368,25 +367,12 @@ for.body:                                         ; preds = %entry, %for.body
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          loop not vectorized
 ; YAML-NEXT: ...
-; YAML-NEXT: --- !Analysis
-; YAML-NEXT: Pass:            loop-vectorize
-; YAML-NEXT: Name:            UnsafeDep
-; YAML-NEXT: DebugLoc:        { File: source.c, Line: 83, Column: 7 }
-; YAML-NEXT: Function:        test_unknown_dep
-; YAML-NEXT: Args:
-; YAML-NEXT:   - String:          'loop not vectorized: '
-; YAML-NEXT:   - String:          'unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop'
-; YAML-NEXT:   - String:          "\nUnknown data dependence."
-; YAML-NEXT:   - String:          ' Memory location is the same as accessed at '
-; YAML-NEXT:   - Location:        'source.c:82:7'
-; YAML-NEXT:     DebugLoc:        { File: source.c, Line: 82, Column: 7 }
-; YAML-NEXT: ...
 ; YAML-NEXT: --- !Missed
 ; YAML-NEXT: Pass:            loop-vectorize
-; YAML-NEXT: Name:            MissedDetails
-; YAML-NEXT: Function:        test_unknown_dep
+; YAML-NEXT: Name:            VectorizationNotBeneficial
+; YAML-NEXT: Function:        test_no_overlap
 ; YAML-NEXT: Args:
-; YAML-NEXT:   - String:          loop not vectorized
+; YAML-NEXT:   - String:          the cost-model indicates that vectorization is not beneficial
 ; YAML-NEXT: ...
 
 

>From 995f9104ee384d024aad546ff2743cdbe0531dd2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 11 Sep 2024 09:45:18 +0100
Subject: [PATCH 2/3] LDist: update test

---
 .../scev-inserted-runtime-check.ll            | 164 +++---------------
 1 file changed, 28 insertions(+), 136 deletions(-)

diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
index 4e75699c91773f..5494219d0d5a2e 100644
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -7,84 +7,30 @@
 define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr noalias %e, i64 %N) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK:%.*]]
-; CHECK:       for.body.lver.check:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
-; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
-; CHECK:       for.body.ph.lver.orig:
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
-; CHECK:       for.body.lver.orig:
-; CHECK-NEXT:    [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
-; CHECK-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
-; CHECK-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]]
-; CHECK-NEXT:    [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
-; CHECK-NEXT:    [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1
-; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LVER_ORIG]]
-; CHECK-NEXT:    store i32 [[MULA_LVER_ORIG]], ptr [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADD_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXD_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADE_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXE_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]]
-; CHECK-NEXT:    [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    store i32 [[MULC_LVER_ORIG]], ptr [[ARRAYIDXC_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
-; CHECK:       for.body.ph.ldist1:
-; CHECK-NEXT:    br label [[FOR_BODY_LDIST1:%.*]]
-; CHECK:       for.body.ldist1:
-; CHECK-NEXT:    [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ]
-; CHECK-NEXT:    [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ]
-; CHECK-NEXT:    [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
-; CHECK-NEXT:    [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
-; CHECK-NEXT:    [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
-; CHECK-NEXT:    [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0
-; CHECK-NEXT:    [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
-; CHECK-NEXT:    [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
-; CHECK-NEXT:    [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
-; CHECK-NEXT:    [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
-; CHECK-NEXT:    [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
-; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
-; CHECK-NEXT:    store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3
-; CHECK-NEXT:    [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
-; CHECK:       for.body.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[IND1]], 2
 ; CHECK-NEXT:    [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
+; CHECK-NEXT:    [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[LOADB:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4
+; CHECK-NEXT:    [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]]
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[IND]], 1
 ; CHECK-NEXT:    [[INC1]] = add i32 [[IND1]], 1
-; CHECK-NEXT:    [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD]]
+; CHECK-NEXT:    store i32 [[MULA]], ptr [[ARRAYIDXA_PLUS_4]], align 4
+; CHECK-NEXT:    [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    [[LOADD:%.*]] = load i32, ptr [[ARRAYIDXD]], align 4
-; CHECK-NEXT:    [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    [[LOADE:%.*]] = load i32, ptr [[ARRAYIDXE]], align 4
 ; CHECK-NEXT:    [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]]
-; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit2:
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
@@ -140,84 +86,30 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
 ; CHECK-NEXT:    [[A_INTPTR:%.*]] = ptrtoint ptr @global_a to i64
 ; CHECK-NEXT:    call void @use64(i64 [[A_INTPTR]])
 ; CHECK-NEXT:    [[A:%.*]] = getelementptr i32, ptr @global_a, i32 42
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK:%.*]]
-; CHECK:       for.body.lver.check:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; CHECK-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
-; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
-; CHECK:       for.body.ph.lver.orig:
-; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
-; CHECK:       for.body.lver.orig:
-; CHECK-NEXT:    [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
-; CHECK-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
-; CHECK-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
-; CHECK-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[MULA_LVER_ORIG:%.*]] = mul i32 [[LOADB_LVER_ORIG]], [[LOADA_LVER_ORIG]]
-; CHECK-NEXT:    [[ADD_LVER_ORIG]] = add nuw nsw i64 [[IND_LVER_ORIG]], 1
-; CHECK-NEXT:    [[INC1_LVER_ORIG]] = add i32 [[IND1_LVER_ORIG]], 1
-; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LVER_ORIG]]
-; CHECK-NEXT:    store i32 [[MULA_LVER_ORIG]], ptr [[ARRAYIDXA_PLUS_4_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXD_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADD_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXD_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[ARRAYIDXE_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[E:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    [[LOADE_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXE_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[MULC_LVER_ORIG:%.*]] = mul i32 [[LOADD_LVER_ORIG]], [[LOADE_LVER_ORIG]]
-; CHECK-NEXT:    [[ARRAYIDXC_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
-; CHECK-NEXT:    store i32 [[MULC_LVER_ORIG]], ptr [[ARRAYIDXC_LVER_ORIG]], align 4
-; CHECK-NEXT:    [[EXITCOND_LVER_ORIG:%.*]] = icmp eq i64 [[ADD_LVER_ORIG]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_LVER_ORIG]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_LVER_ORIG]]
-; CHECK:       for.body.ph.ldist1:
-; CHECK-NEXT:    br label [[FOR_BODY_LDIST1:%.*]]
-; CHECK:       for.body.ldist1:
-; CHECK-NEXT:    [[IND_LDIST1:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ]
-; CHECK-NEXT:    [[IND1_LDIST1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LDIST1]] ], [ [[INC1_LDIST1:%.*]], [[FOR_BODY_LDIST1]] ]
-; CHECK-NEXT:    [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
-; CHECK-NEXT:    [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
-; CHECK-NEXT:    [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
-; CHECK-NEXT:    [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5
-; CHECK-NEXT:    [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
-; CHECK-NEXT:    [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
-; CHECK-NEXT:    [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
-; CHECK-NEXT:    [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
-; CHECK-NEXT:    [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
-; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
-; CHECK-NEXT:    store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8
-; CHECK-NEXT:    [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
-; CHECK:       for.body.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[IND:%.*]] = phi i64 [ 0, [[FOR_BODY_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IND1:%.*]] = phi i32 [ 0, [[FOR_BODY_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IND:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IND1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[IND1]], 2
 ; CHECK-NEXT:    [[MUL_EXT:%.*]] = zext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
+; CHECK-NEXT:    [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[LOADB:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4
+; CHECK-NEXT:    [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]]
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[IND]], 1
 ; CHECK-NEXT:    [[INC1]] = add i32 [[IND1]], 1
-; CHECK-NEXT:    [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD]]
+; CHECK-NEXT:    store i32 [[MULA]], ptr [[ARRAYIDXA_PLUS_4]], align 4
+; CHECK-NEXT:    [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    [[LOADD:%.*]] = load i32, ptr [[ARRAYIDXD]], align 4
-; CHECK-NEXT:    [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    [[LOADE:%.*]] = load i32, ptr [[ARRAYIDXE]], align 4
 ; CHECK-NEXT:    [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]]
-; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[MUL_EXT]]
+; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit2:
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;

>From 5019174a82ca9f2e6ce6e9260d496eba3bf73be8 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 23 Sep 2024 10:23:49 +0100
Subject: [PATCH 3/3] LAA: fix typo

---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 46fbf6c0e0e71f..a26da1d5fbb772 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1970,7 +1970,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << *StrideAPtr
                     << " Sink induction step: " << *StrideBPtr << "\n");
 
-  // Note that store size is different from alloc size, which is depedent on
+  // Note that store size is different from alloc size, which is dependent on
   // store size. We use the former for checking illegal cases, and the latter
   // for scaling strides.
   TypeSize AStoreSz = DL.getTypeStoreSizeInBits(ATy),



More information about the llvm-commits mailing list