[llvm] [LAA] Prepare to handle diff type sizes v2 (PR #161238)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 29 10:14:35 PDT 2025


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/161238

The change was originally landed as 1aded51 ([LAA] Prepare to handle diff type sizes (NFC)), but resulted in regressions, and a subsequent crash when 56a1cbb ([LAA] Fix non-NFC parts 1aded51) was landed. This iteration includes tests from reports, corresponding fixes, and is not a NFC. In particular, it fixes the case of loop-guards not being applied before checking isSafeDependenceDistance.

 As depend_diff_types shows, there are several places where the HasSameSize check can be relaxed for higher analysis precision. As a first step, return both the source size and the sink size from getDependenceDistanceStrideAndSize, along with a HasSameSize boolean for the moment.

>From 6fe7e69b9376331aac2f025e5353fc1c6aaa8cc2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 29 Sep 2025 12:39:13 +0100
Subject: [PATCH 1/3] [LAA] Prepare to handle diff type sizes v2

The change was originally landed as 1aded51 ([LAA] Prepare to handle
diff type sizes (NFC)), but resulted in regressions, and a subsequent
crash when 56a1cbb ([LAA] Fix non-NFC parts 1aded51) was landed. This
iteration includes tests from reports, corresponding fixes, and is not
a NFC. In particular, it fixes the case of loop-guards not being applied
before checking isSafeDependenceDistance.

As depend_diff_types shows, there are several places where the
HasSameSize check can be relaxed for higher analysis precision. As a
first step, return both the source size and the sink size from
getDependenceDistanceStrideAndSize, along with a HasSameSize boolean for
the moment.
---
 .../llvm/Analysis/LoopAccessAnalysis.h        | 27 ++++-----
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 56 +++++++++++--------
 .../LoopAccessAnalysis/depend_diff_types.ll   | 39 +++++++++++++
 3 files changed, 87 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 52ab38583d5de..49a795b5fd6a7 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -413,29 +413,30 @@ class MemoryDepChecker {
     uint64_t MaxStride;
     std::optional<uint64_t> CommonStride;
 
-    /// TypeByteSize is either the common store size of both accesses, or 0 when
-    /// store sizes mismatch.
-    uint64_t TypeByteSize;
+    /// TypeByteSize is a pair of alloc sizes of the source and sink.
+    std::pair<uint64_t, uint64_t> TypeByteSize;
+
+    // HasSameSize is a boolean indicating whether the store sizes of the source
+    // and sink are equal.
+    // TODO: Remove this.
+    bool HasSameSize;
 
     bool AIsWrite;
     bool BIsWrite;
 
     DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
                                  std::optional<uint64_t> CommonStride,
-                                 uint64_t TypeByteSize, bool AIsWrite,
-                                 bool BIsWrite)
+                                 std::pair<uint64_t, uint64_t> TypeByteSize,
+                                 bool HasSameSize, bool AIsWrite, bool BIsWrite)
         : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
-          TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+          TypeByteSize(TypeByteSize), HasSameSize(HasSameSize),
+          AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
   };
 
   /// Get the dependence distance, strides, type size and whether it is a write
-  /// for the dependence between A and B. Returns a DepType, if we can prove
-  /// there's no dependence or the analysis fails. Outlined to lambda to limit
-  /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
-  /// others. Returns either the dependence result, if it could already be
-  /// determined, or a DepDistanceStrideAndSizeInfo struct, noting that
-  /// TypeByteSize could be 0 when store sizes mismatch, and this should be
-  /// checked in the caller.
+  /// for the dependence between A and B. Returns either a DepType, the
+  /// dependence result, if it could already be determined, or a
+  /// DepDistanceStrideAndSizeInfo struct.
   std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
   getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
                                      const MemAccessInfo &B,
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 87fae92977cd2..512ae415d1c3b 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2090,14 +2090,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
     return MemoryDepChecker::Dependence::Unknown;
   }
 
-  TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
-  TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
-
-  // If store sizes are not the same, set TypeByteSize to zero, so we can check
-  // it in the caller isDependent.
   uint64_t ASz = DL.getTypeAllocSize(ATy);
   uint64_t BSz = DL.getTypeAllocSize(BTy);
-  uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
+
+  // Both the source and sink sizes are neeeded in dependence checks, depending
+  // on the use.
+  std::pair<uint64_t, uint64_t> TypeByteSize(ASz, BSz);
 
   uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
   uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
@@ -2119,8 +2117,24 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
     return Dependence::Unknown;
   }
 
+  // When the distance is possibly zero, we're reading/writing the same memory
+  // location: if the store sizes are not equal, fail with an unknown
+  // dependence.
+  TypeSize AStoreSz = DL.getTypeStoreSize(ATy);
+  TypeSize BStoreSz = DL.getTypeStoreSize(BTy);
+  if (AStoreSz != BStoreSz && SE.isKnownNonPositive(Dist) &&
+      SE.isKnownNonNegative(Dist)) {
+    LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence distance with "
+                         "different type sizes\n");
+    return Dependence::Unknown;
+  }
+
+  // TODO: Remove this.
+  bool HasSameSize = AStoreSz == BStoreSz;
+
   return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
-                                      TypeByteSize, AIsWrite, BIsWrite);
+                                      TypeByteSize, HasSameSize, AIsWrite,
+                                      BIsWrite);
 }
 
 MemoryDepChecker::Dependence::DepType
@@ -2152,9 +2166,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     return std::get<Dependence::DepType>(Res);
   }
 
-  auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
-      std::get<DepDistanceStrideAndSizeInfo>(Res);
-  bool HasSameSize = TypeByteSize > 0;
+  auto &[Dist, MaxStride, CommonStride, TypeByteSize, HasSameSize, AIsWrite,
+         BIsWrite] = std::get<DepDistanceStrideAndSizeInfo>(Res);
 
   ScalarEvolution &SE = *PSE.getSE();
   auto &DL = InnermostLoop->getHeader()->getDataLayout();
@@ -2180,7 +2193,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     // If the distance between accesses and their strides are known constants,
     // check whether the accesses interlace each other.
     if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
-        areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
+        areStridedAccessesIndependent(ConstDist, *CommonStride,
+                                      TypeByteSize.first)) {
       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
       return Dependence::NoDep;
     }
@@ -2194,13 +2208,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // Negative distances are not plausible dependencies.
   if (SE.isKnownNonPositive(Dist)) {
     if (SE.isKnownNonNegative(Dist)) {
-      if (HasSameSize) {
-        // Write to the same location with the same size.
-        return Dependence::Forward;
-      }
-      LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
-                           "different type sizes\n");
-      return Dependence::Unknown;
+      // Write to the same location with the same size.
+      assert(HasSameSize && "Accesses must have the same size");
+      return Dependence::Forward;
     }
 
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
@@ -2218,7 +2228,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
                                               : Dependence::Unknown;
       }
       if (!HasSameSize ||
-          couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
+          couldPreventStoreLoadForward(ConstDist, TypeByteSize.first)) {
         LLVM_DEBUG(
             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
         return Dependence::ForwardButPreventsForwarding;
@@ -2284,7 +2294,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // We know that Dist is positive, but it may not be constant. Use the signed
   // minimum for computations below, as this ensures we compute the closest
   // possible dependence distance.
-  uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
+  uint64_t MinDistanceNeeded =
+      MaxStride * (MinNumIter - 1) + TypeByteSize.first;
   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
     if (!ConstDist) {
       // For non-constant distances, we checked the lower bound of the
@@ -2312,14 +2323,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
   if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
-      couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
+      couldPreventStoreLoadForward(MinDistance, TypeByteSize.first,
+                                   *CommonStride))
     return Dependence::BackwardVectorizableButPreventsForwarding;
 
   uint64_t MaxVF = MinDepDistBytes / MaxStride;
   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
                     << " with max VF = " << MaxVF << '\n');
 
-  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+  uint64_t MaxVFInBits = MaxVF * TypeByteSize.first * 8;
   if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
     // For non-constant distances, we checked the lower bound of the dependence
     // distance and the distance may be larger at runtime (and safe for
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
index 023a8c056968f..c367b31f6d445 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -187,6 +187,45 @@ exit:
   ret void
 }
 
+; In the following test, dependence distance is possibly zero,
+; but this is not equivalent to the condition known-non-positive
+; and known-non-negative.
+
+define void @possibly_zero_dist_diff_typesz(ptr %p) {
+; CHECK-LABEL: 'possibly_zero_dist_diff_typesz'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %ld.p = load i32, ptr %gep.p.iv.i32, align 1 ->
+; CHECK-NEXT:            store i16 %trunc, ptr %gep.p.iv.i16, align 1
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.p.iv.i32 = getelementptr inbounds nuw i32, ptr %p, i16 %iv
+  %ld.p = load i32, ptr %gep.p.iv.i32, align 1
+  %trunc = trunc i32 %ld.p to i16
+  %gep.p.iv.i16 = getelementptr inbounds nuw i16, ptr %p, i16 %iv
+  store i16 %trunc, ptr %gep.p.iv.i16, align 1
+  %iv.next = add nuw nsw i16 %iv, 1
+  %exit.cond = icmp eq i16 %iv.next, 32
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
 ; In the following test, the sink is loop-invariant.
 
 define void @type_size_equivalence_sink_loopinv(ptr nocapture %vec, i64 %n) {

>From 9462d9f8e6e1e3141df3c3970e838ae797e34cb2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 29 Sep 2025 13:53:04 +0100
Subject: [PATCH 2/3] [LAA] Pre-commit crash test

---
 .../unknown-dependence-with-loop-guards.ll    | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll
new file mode 100644
index 0000000000000..6995550811f41
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll
@@ -0,0 +1,26 @@
+; REQUIRES: asserts
+; RUN: not --crash opt -passes='print<access-info>' -disable-output %s
+
+define void @unknown_dep_loopguards(ptr %a, ptr %b, ptr %c) {
+entry:
+  %ld.b = load i32, ptr %b
+  %guard.cond = icmp slt i32 0, %ld.b
+  br i1 %guard.cond, label %exit, label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
+  %offset = add i32 %ld.b, %iv
+  %gep.a.offset = getelementptr i32, ptr %a, i32 %offset
+  %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4
+  %ld.a = load [4 x i32], ptr %gep.a.offset.2
+  store [4 x i32] %ld.a, ptr %c
+  %offset.4 = add i32 %offset, 4
+  %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4
+  store i32 0, ptr %gep.a.offset.4
+  %iv.next = add i32 %iv, 8
+  %exit.cond = icmp eq i32 %iv.next, 16
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}

>From f4c295d2432ba2ce6d91650d4c1bee01b621146d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 29 Sep 2025 17:54:02 +0100
Subject: [PATCH 3/3] [LAA] Fix for crash, non-NFC

---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  39 +++--
 .../is-safe-dep-distance-with-loop-guards.ll  |  46 ------
 .../unknown-dependence-with-loop-guards.ll    | 140 +++++++++++++++++-
 .../interleaved-accesses-use-after-free.ll    |  24 ---
 .../reuse-lcssa-phi-scev-expansion.ll         | 102 ++-----------
 5 files changed, 166 insertions(+), 185 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 512ae415d1c3b..8a5f799793a14 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2106,17 +2106,23 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   if (StrideAScaled == StrideBScaled)
     CommonStride = StrideAScaled;
 
-  // TODO: Historically, we didn't retry with runtime checks when (unscaled)
-  // strides were different but there is no inherent reason to.
-  if (!isa<SCEVConstant>(Dist))
-    ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
-
   // If distance is a SCEVCouldNotCompute, return Unknown immediately.
   if (isa<SCEVCouldNotCompute>(Dist)) {
     LLVM_DEBUG(dbgs() << "LAA: Uncomputable distance.\n");
     return Dependence::Unknown;
   }
 
+  if (!isa<SCEVConstant>(Dist)) {
+    if (!LoopGuards)
+      LoopGuards.emplace(
+          ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
+    Dist = SE.applyLoopGuards(Dist, *LoopGuards);
+
+    // TODO: Historically, we didn't retry with runtime checks when (unscaled)
+    // strides were different but there is no inherent reason to.
+    ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
+  }
+
   // When the distance is possibly zero, we're reading/writing the same memory
   // location: if the store sizes are not equal, fail with an unknown
   // dependence.
@@ -2188,21 +2194,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   uint64_t ConstDist =
       match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
 
-  // Attempt to prove strided accesses independent.
-  if (APDist) {
-    // If the distance between accesses and their strides are known constants,
-    // check whether the accesses interlace each other.
-    if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
-        areStridedAccessesIndependent(ConstDist, *CommonStride,
-                                      TypeByteSize.first)) {
-      LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
-      return Dependence::NoDep;
-    }
-  } else {
-    if (!LoopGuards)
-      LoopGuards.emplace(
-          ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
-    Dist = SE.applyLoopGuards(Dist, *LoopGuards);
+  // Attempt to prove strided accesses independent. If the distance between
+  // accesses and their strides are known constants, check whether the accesses
+  // interlace each other.
+  if (ConstDist && CommonStride && CommonStride > 1 && HasSameSize &&
+      areStridedAccessesIndependent(ConstDist, *CommonStride,
+                                    TypeByteSize.first)) {
+    LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
+    return Dependence::NoDep;
   }
 
   // Negative distances are not plausible dependencies.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll
index 9cc0a976c900e..9b23ed6e6eca0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/is-safe-dep-distance-with-loop-guards.ll
@@ -37,49 +37,3 @@ loop:
 exit:
   ret void
 }
-
-define void @safe_deps_2_due_to_dependence_distance(i16 %n, ptr %p3, i16 noundef %q, ptr %p1, ptr %p2) {
-; CHECK-LABEL: 'safe_deps_2_due_to_dependence_distance'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
-;
-entry:
-  %0 = shl i16 %n, 1
-  %alloca = alloca [2 x i32], i16 %0
-  %arrayidx1 = getelementptr inbounds i32, ptr %p1, i16 %q
-  %arrayidx2 = getelementptr inbounds i8, ptr %p3, i16 2
-  %arrayidx4 = getelementptr inbounds i32, ptr %p2, i16 %q
-  %cmp42 = icmp sgt i16 %n, 0
-  br i1 %cmp42, label %ph, label %exit
-
-ph:
-  %arrayidx40 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %n
-  br label %loop
-
-loop:
-  %iv = phi i16 [ 0, %ph ], [ %iv.next, %loop ]
-  %arrayidx6 = getelementptr inbounds i32, ptr %arrayidx1, i16 %iv
-  %arrayidx11 = getelementptr inbounds i32, ptr %arrayidx4, i16 %iv
-  %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv
-  store i32 10, ptr %arrayidx22
-  %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4
-  store i32 16, ptr %arrayidx33
-  %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv
-  store i32 19, ptr %arrayidx42
-  %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4
-  store i32 23, ptr %arrayidx53
-  %iv.next = add nuw nsw i16 %iv, 1
-  %exitcond.not = icmp eq i16 %iv.next, %n
-  br i1 %exitcond.not, label %exit, label %loop
-
-exit:
-  ret void
-}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll
index 6995550811f41..4d4ecb3b28aeb 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/unknown-dependence-with-loop-guards.ll
@@ -1,7 +1,53 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes='print<access-info>' -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
 
-define void @unknown_dep_loopguards(ptr %a, ptr %b, ptr %c) {
+define void @unsafe_dep_loopguards(ptr %a, ptr %b, ptr %c) {
+; CHECK-LABEL: 'unsafe_dep_loopguards'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %ld.a = load [4 x i32], ptr %gep.a.offset.2, align 4 ->
+; CHECK-NEXT:            store i32 0, ptr %gep.a.offset.4, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:        ptr %c
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:        ptr %c
+; CHECK-NEXT:        Against group GRP2:
+; CHECK-NEXT:          %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: %c High: (16 + %c))
+; CHECK-NEXT:            Member: %c
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (16 + (4 * (sext i32 %ld.b to i64))<nsw> + %a) High: (64 + (4 * (sext i32 %ld.b to i64))<nsw> + %a))
+; CHECK-NEXT:            Member: {(16 + (4 * (sext i32 %ld.b to i64))<nsw> + %a),+,32}<%loop>
+; CHECK-NEXT:        Group GRP2:
+; CHECK-NEXT:          (Low: ((4 * (sext i32 (4 + %ld.b) to i64))<nsw> + %a) High: (36 + (4 * (sext i32 (4 + %ld.b) to i64))<nsw> + %a))
+; CHECK-NEXT:            Member: {((4 * (sext i32 (4 + %ld.b) to i64))<nsw> + %a),+,32}<%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-NEXT:      {(4 + %ld.b),+,8}<nw><%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {((4 * (sext i32 (4 + %ld.b) to i64))<nsw> + %a),+,32}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {%ld.b,+,8}<nw><%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {(16 + (4 * (sext i32 %ld.b to i64))<nsw> + %a),+,32}<%loop> Added Flags: <nusw>
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:      [PSE] %gep.a.offset.2 = getelementptr i32, ptr %gep.a.offset, i32 4:
+; CHECK-NEXT:        (16 + (4 * (sext i32 {%ld.b,+,8}<nw><%loop> to i64))<nsw> + %a)
+; CHECK-NEXT:        --> {(16 + (4 * (sext i32 %ld.b to i64))<nsw> + %a),+,32}<%loop>
+; CHECK-NEXT:      [PSE] %gep.a.offset.4 = getelementptr i32, ptr %a, i32 %offset.4:
+; CHECK-NEXT:        ((4 * (sext i32 {(4 + %ld.b),+,8}<nw><%loop> to i64))<nsw> + %a)
+; CHECK-NEXT:        --> {((4 * (sext i32 (4 + %ld.b) to i64))<nsw> + %a),+,32}<%loop>
+;
 entry:
   %ld.b = load i32, ptr %b
   %guard.cond = icmp slt i32 0, %ld.b
@@ -24,3 +70,91 @@ loop:
 exit:
   ret void
 }
+
+define void @safe_with_rtchecks_loopguards(i16 %n, ptr %p3, i16 noundef %q, ptr %p1, ptr %p2) {
+; CHECK-LABEL: 'safe_with_rtchecks_loopguards'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv
+; CHECK-NEXT:        Against group GRP2:
+; CHECK-NEXT:          %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv
+; CHECK-NEXT:      Check 2:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv
+; CHECK-NEXT:        Against group GRP3:
+; CHECK-NEXT:          %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4
+; CHECK-NEXT:      Check 3:
+; CHECK-NEXT:        Comparing group GRP1:
+; CHECK-NEXT:          %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4
+; CHECK-NEXT:        Against group GRP2:
+; CHECK-NEXT:          %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv
+; CHECK-NEXT:      Check 4:
+; CHECK-NEXT:        Comparing group GRP1:
+; CHECK-NEXT:          %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4
+; CHECK-NEXT:        Against group GRP3:
+; CHECK-NEXT:          %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4
+; CHECK-NEXT:      Check 5:
+; CHECK-NEXT:        Comparing group GRP2:
+; CHECK-NEXT:          %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv
+; CHECK-NEXT:        Against group GRP3:
+; CHECK-NEXT:          %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: %alloca High: (4 + (8 * (zext i16 (-1 + %n) to i64))<nuw><nsw> + %alloca))
+; CHECK-NEXT:            Member: {%alloca,+,8}<nuw><%loop>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (4 + %alloca) High: (8 + (8 * (zext i16 (-1 + %n) to i64))<nuw><nsw> + %alloca))
+; CHECK-NEXT:            Member: {(4 + %alloca),+,8}<nuw><%loop>
+; CHECK-NEXT:        Group GRP2:
+; CHECK-NEXT:          (Low: ((8 * (sext i16 %n to i64))<nsw> + %alloca) High: (4 + (8 * (zext i16 (-1 + %n) to i64))<nuw><nsw> + (8 * (sext i16 %n to i64))<nsw> + %alloca))
+; CHECK-NEXT:            Member: {((8 * (sext i16 %n to i64))<nsw> + %alloca),+,8}<nw><%loop>
+; CHECK-NEXT:        Group GRP3:
+; CHECK-NEXT:          (Low: (4 + (8 * (sext i16 %n to i64))<nsw> + %alloca) High: (8 + (8 * (zext i16 (-1 + %n) to i64))<nuw><nsw> + (8 * (sext i16 %n to i64))<nsw> + %alloca))
+; CHECK-NEXT:            Member: {(4 + (8 * (sext i16 %n to i64))<nsw> + %alloca),+,8}<nw><%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %0 = shl i16 %n, 1
+  %alloca = alloca [2 x i32], i16 %0
+  %arrayidx1 = getelementptr inbounds i32, ptr %p1, i16 %q
+  %arrayidx2 = getelementptr inbounds i8, ptr %p3, i16 2
+  %arrayidx4 = getelementptr inbounds i32, ptr %p2, i16 %q
+  %cmp42 = icmp sgt i16 %n, 0
+  br i1 %cmp42, label %ph, label %exit
+
+ph:
+  %arrayidx40 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %n
+  br label %loop
+
+loop:
+  %iv = phi i16 [ 0, %ph ], [ %iv.next, %loop ]
+  %arrayidx6 = getelementptr inbounds i32, ptr %arrayidx1, i16 %iv
+  %arrayidx11 = getelementptr inbounds i32, ptr %arrayidx4, i16 %iv
+  %arrayidx22 = getelementptr inbounds [2 x i32], ptr %alloca, i16 %iv
+  store i32 10, ptr %arrayidx22
+  %arrayidx33 = getelementptr inbounds i8, ptr %arrayidx22, i16 4
+  store i32 16, ptr %arrayidx33
+  %arrayidx42 = getelementptr inbounds [2 x i32], ptr %arrayidx40, i16 %iv
+  store i32 19, ptr %arrayidx42
+  %arrayidx53 = getelementptr inbounds i8, ptr %arrayidx42, i16 4
+  store i32 23, ptr %arrayidx53
+  %iv.next = add nuw nsw i16 %iv, 1
+  %exitcond.not = icmp eq i16 %iv.next, %n
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
index d5239d5a4e33d..4417486ac8212 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-use-after-free.ll
@@ -1,6 +1,5 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=loop-vectorize -debug-only=loop-accesses -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s -check-prefix=LOOP-ACCESS
-; RUN: opt -passes=loop-vectorize -debug-only=vectorutils -force-vector-width=4 -disable-output %s 2>&1 | FileCheck %s
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-redhat-linux-gnu"
 
@@ -25,29 +24,6 @@ target triple = "x86_64-redhat-linux-gnu"
 
 ; LOOP-ACCESS: Too many dependences, stopped recording
 
-; If no dependences are recorded because there are too many, LoopAccessAnalysis
-; just conservatively returns true for any pair of instructions compared (even
-; those belonging to the same store group). This tests make sure that we do not
-; incorrectly release a store group which had no dependences between its
-; members, even if we have no dependences recorded because there are too many. 
-
-; CHECK: LV: Creating an interleave group with:  store ptr null, ptr %phi5, align 8
-; CHECK: LV: Inserted:  store ptr %load12, ptr %getelementptr11, align 8
-; CHECK:     into the interleave group with  store ptr null, ptr %phi5
-; CHECK: LV: Inserted:  store ptr %load7, ptr %getelementptr, align 8
-; CHECK:     into the interleave group with  store ptr null, ptr %phi5
-
-; CHECK: LV: Creating an interleave group with:  store ptr null, ptr %getelementptr13, align 8
-; CHECK: LV: Inserted:  store ptr null, ptr %phi6, align 8
-; CHECK:     into the interleave group with  store ptr null, ptr %getelementptr13
-; CHECK: LV: Invalidated store group due to dependence between   store ptr %load7, ptr %getelementptr, align 8 and   store ptr null, ptr %getelementptr13, align 8
-; CHECK-NOT: LV: Invalidated store group due to dependence between
-
-; Note: The (only) invalidated store group is the one containing A (store ptr %load7, ptr %getelementptr, align 8) which is:
-; Group with instructions:  
-;   store ptr null, ptr %phi5, align 8
-;   store ptr %load7, ptr %getelementptr, align 8
-;   store ptr %load12, ptr %getelementptr11, align 8
 define void @test(ptr %arg, ptr %arg1) local_unnamed_addr #0 {
 bb:
   br label %bb2
diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
index 73d5e26ef82a2..66e25152fe536 100644
--- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
 ; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
 
 declare noalias noundef ptr @calloc(i64 noundef, i64 noundef)
@@ -49,21 +49,8 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[IV_LCSSA]], %[[PH]] ]
-; CHECK-NEXT:    br label %[[LOOP_2:.*]]
-; CHECK:       [[LOOP_2]]:
-; CHECK-NEXT:    [[IV_3:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_2]] ]
-; CHECK-NEXT:    [[IV_NEXT_3]] = add nsw i64 [[IV_3]], -1
-; CHECK-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[IV_3]]
-; CHECK-NEXT:    [[L:%.*]] = load ptr, ptr [[GEP_SRC_2]], align 8
-; CHECK-NEXT:    store ptr null, ptr [[L]], align 8
-; CHECK-NEXT:    [[T:%.*]] = trunc nuw i64 [[IV_3]] to i32
-; CHECK-NEXT:    [[EC_2:%.*]] = icmp sgt i32 [[T]], 1
-; CHECK-NEXT:    br i1 [[EC_2]], label %[[LOOP_2]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       [[FOR_END]]:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.1
@@ -113,40 +100,13 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
 ; CHECK-NEXT:    [[EC_1:%.*]] = icmp eq i32 [[CALL]], 0
 ; CHECK-NEXT:    br i1 [[EC_1]], label %[[LOOP_2_HEADER_PREHEADER:.*]], label %[[LOOP_1]]
 ; CHECK:       [[LOOP_2_HEADER_PREHEADER]]:
-; CHECK-NEXT:    [[SEL_DST_LCSSA1:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
 ; CHECK-NEXT:    [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1]] ]
 ; CHECK-NEXT:    [[SEL_DST_LCSSA:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
-; CHECK-NEXT:    [[SEL_DST_LCSSA12:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA1]] to i64
-; CHECK-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
-; CHECK:       [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[PTR_IV_1_LCSSA]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA12]]
-; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
-; CHECK-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 1022
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 1022
-; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[SEL_DST_LCSSA]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP4]], align 1
-; CHECK-NEXT:    store <2 x i8> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[SCALAR_PH]]
-; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1023, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label %[[LOOP_2_HEADER:.*]]
 ; CHECK:       [[LOOP_2_HEADER]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[EC_2:%.*]] = icmp eq i32 [[IV]], 1024
 ; CHECK-NEXT:    br i1 [[EC_2]], label %[[EXIT:.*]], label %[[LOOP_2_LATCH]]
 ; CHECK:       [[LOOP_2_LATCH]]:
@@ -155,7 +115,7 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
 ; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV_3]], align 1
 ; CHECK-NEXT:    [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1
 ; CHECK-NEXT:    store i8 [[L]], ptr [[PTR_IV_2]], align 1
-; CHECK-NEXT:    br label %[[LOOP_2_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br label %[[LOOP_2_HEADER]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -241,22 +201,11 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog
 ; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP15]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[IV_1_LCSSA]], %[[LOOP_2_PREHEADER]] ], [ [[IV_1_LCSSA]], %[[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT:    br label %[[LOOP_2:.*]]
-; CHECK:       [[LOOP_2]]:
-; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2]], [[STEP]]
-; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV_2]]
-; CHECK-NEXT:    store i32 0, ptr [[GEP_DST]], align 4
-; CHECK-NEXT:    [[EC_2:%.*]] = icmp slt i32 [[IV_2_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.1
@@ -327,26 +276,10 @@ define void @expand_diff_neg_ptrtoint_expr(ptr %src, ptr %start) {
 ; CHECK-NEXT:    store <2 x i64> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], -2
-; CHECK-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[TMP1]], %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    br label %[[LOOP_3:.*]]
-; CHECK:       [[LOOP_3]]:
-; CHECK-NEXT:    [[IV_2:%.*]] = phi i64 [ [[IV_NEXT_2:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_3]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[IV_2]], -1
-; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP12]]
-; CHECK-NEXT:    [[L:%.*]] = load i64, ptr [[GEP_SRC]], align 8
-; CHECK-NEXT:    [[PTR_IV_3_NEXT]] = getelementptr i8, ptr [[PTR_IV_3]], i64 8
-; CHECK-NEXT:    store i64 [[L]], ptr [[PTR_IV_3]], align 8
-; CHECK-NEXT:    [[IV_NEXT_2]] = add i64 [[IV_2]], 1
-; CHECK-NEXT:    [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_2]], 0
-; CHECK-NEXT:    br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP_3]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.1
@@ -421,25 +354,10 @@ define void @scev_exp_reuse_const_add(ptr %dst, ptr %src) {
 ; CHECK-NEXT:    store <2 x i16> [[WIDE_LOAD]], ptr [[NEXT_GEP]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 40
-; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_NEXT_LCSSA]], %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    br label %[[LOOP_2:.*]]
-; CHECK:       [[LOOP_2]]:
-; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_2]] ]
-; CHECK-NEXT:    [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2]] ]
-; CHECK-NEXT:    [[IV_2_NEXT]] = add i64 [[IV_1]], 1
-; CHECK-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV_2_NEXT]]
-; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[GEP_SRC_1]], align 2
-; CHECK-NEXT:    [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i64 2
-; CHECK-NEXT:    store i16 [[L]], ptr [[PTR_IV_2]], align 2
-; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_1]], 40
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_2]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.1



More information about the llvm-commits mailing list