[llvm-branch-commits] [llvm] [LAA] Support more cases with non-constant strided accesses (PR #187406)

Wed Mar 18 16:58:43 PDT 2026

https://github.com/eas created https://github.com/llvm/llvm-project/pull/187406

Namely, allow vectorization of

```c++
  for (int i = 0; i < N; i += stride)
    a[i] = a[offset + i];
```

...if `stride` is statically known to be positive even if it's non-constant.

>From ae04200ad6226f9ed881a4aa4422f1b02a76a279 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Wed, 18 Mar 2026 12:01:55 -0700
Subject: [PATCH 1/2] Add a test

---
 .../non-constant-strides.ll                   | 526 ++++++++++++++++++
 1 file changed, 526 insertions(+)
 create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
new file mode 100644
index 0000000000000..ee7aa504d3a84
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<access-info>' -disable-output < %s -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+
+define void @known_safe(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_safe'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %a.zext = zext i8 %a to i64
+  %stride = add i64 %a.zext, 1
+  %offset = mul i64 %stride, 128
+  %p.out = getelementptr i64, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+define void @known_safe_byte_geps(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_safe_byte_geps'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %a.zext = zext i8 %a to i64
+  %stride.elts = add i64 %a.zext, 1
+  %stride = mul i64 %stride.elts, 8
+  %offset = mul i64 %stride, 128
+  %p.out = getelementptr i8, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; `%a u> 0` would be enough to prove safety.
+define void @safe_if_non_zero(ptr %p, i8 %a) {
+; CHECK-LABEL: 'safe_if_non_zero'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %stride = zext i8 %a to i64
+  %offset = mul i64 %stride, 128
+  %p.out = getelementptr i64, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; `%a u> 0` would be enough to prove safety.
+define void @safe_if_non_zero_byte_gep(ptr %p, i8 %a) {
+; CHECK-LABEL: 'safe_if_non_zero_byte_gep'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %stride.elts = zext i8 %a to i64
+  %stride = mul i64 %stride.elts, 8
+  %offset = mul i64 %stride, 128
+  %p.out = getelementptr i8, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; `%a u>= 8` is necessary to prove safety.
+define void @known_non_negative_byte_gep(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_non_negative_byte_gep'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %stride = zext i8 %a to i64
+  %offset = mul i64 %stride, 128
+  %p.out = getelementptr i8, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Stride is safe (i.e., `%stride u>= type-size` is known statically), only the
+; `%offset` needs to be checked to ensure in/out pointers have enough distance
+; between them.
+define void @offset_dep_check_sufficient(ptr %p, i8 %a, i64 %offset) {
+; CHECK-LABEL: 'offset_dep_check_sufficient'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %a.zext = zext i8 %a to i64
+  %stride = add i64 %a.zext, 1
+  %p.out = getelementptr i64, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Same as `@offset_dep_check_sufficient` but both input/output pointers are arguments instead of
+; common base plus offset.
+define void @distance_dep_check_sufficient(ptr %p, ptr %p.out, i8 %a) {
+; CHECK-LABEL: 'distance_dep_check_sufficient'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p.out) umin %p.out) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p.out) umax %p.out)))
+; CHECK-NEXT:            Member: {%p.out,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %a.zext = zext i8 %a to i64
+  %stride = add i64 %a.zext, 1
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+define void @needs_non_zero_stride_and_distance_checks(ptr %p, i8 %a, i64 %offset) {
+; CHECK-LABEL: 'needs_non_zero_stride_and_distance_checks'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %stride = zext i8 %a to i64
+  %p.out = getelementptr i64, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Both distance (offset) and the stride (non-zero, no-overflow) need to be
+; checked at run time.
+define void @needs_all(ptr %p, i64 %stride, i64 %offset) {
+; CHECK-LABEL: 'needs_all'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        IndirectUnsafe:
+; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  %p.out = getelementptr i64, ptr %p, i64 %offset
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Same as `@needs_all` but both input/output pointers are arguments instead of
+; common base plus offset.
+define void @needs_all_distinct_ptrs(ptr %p, ptr %p.out, i64 %stride) {
+; CHECK-LABEL: 'needs_all_distinct_ptrs'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: (((1016 * %stride) + %p.out) umin %p.out) High: (8 + (((1016 * %stride) + %p.out) umax %p.out)))
+; CHECK-NEXT:            Member: {%p.out,+,(8 * %stride)}<%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (((1016 * %stride) + %p) umin %p) High: (8 + (((1016 * %stride) + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 * %stride)}<%header>
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+  %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+  %ld = load i64, ptr %gep.ld
+  %add = add i64 %ld, 1
+  store i64 %add, ptr %gep.st
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Safe to vectorize, only last store is visible outside the loop and `scatter`
+; is ordered even if `%stride == 0` in run-time. However, might be better to be
+; conservative here to avoid miscompiling the next test.
+define void @waw_no_mask(ptr %p, i64 %stride) {
+; CHECK-LABEL: 'waw_no_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  store i64 %iv, ptr %gep
+  store i64 %iv.next, ptr %gep
+
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}
+
+; Unsafe to vectorize without `%stride != 0` RT check.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask(ptr %p, i64 %stride, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask'
+; CHECK-NEXT:    header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %header
+
+header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %iv.next = add nsw i64 %iv, 1
+  %idx = mul nsw nuw i64 %iv, %stride
+
+  %c0 = icmp sle i64 %iv, %n0
+  %c1 = icmp sle i64 %iv, %n1
+
+  %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+  br i1 %c0, label %store0, label %merge
+
+store0:
+  store i64 %iv, ptr %gep
+  br label %merge
+
+merge:
+  br i1 %c1, label %store1, label %latch
+
+store1:
+  store i64 %iv.next, ptr %gep
+  br label %latch
+
+latch:
+  %exitcond = icmp slt i64 %iv.next, 128
+  br i1 %exitcond, label %header, label %exit
+
+exit:
+  ret void
+}

>From f21d290f84db317405de5e52acb4a1b187904ee5 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Wed, 18 Mar 2026 12:44:15 -0700
Subject: [PATCH 2/2] [LAA] Support more cases with non-constant strided
 accesses

Namely, allow vectorization of

```c++
  for (int i = 0; i < N; i += stride)
    a[i] = a[offset + i];
```

...if `stride` is statically known to be positive even if it's non-constant.
---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  15 +-
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 181 ++++++++++++------
 .../dependences-i128-inductions.ll            |   7 +-
 .../non-constant-strides.ll                   |  86 +++++----
 4 files changed, 181 insertions(+), 108 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index ac0b454d33737..92213f93db8c7 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -410,8 +410,9 @@ class MemoryDepChecker {
 
     /// Strides here are scaled; i.e. in bytes, taking the size of the
     /// underlying type into account.
-    uint64_t MaxStride;
-    std::optional<uint64_t> CommonStride;
+    const SCEV *MaxAbsStrideInBytes;
+    /// If `nullptr` then strides (might) differ.
+    const SCEV *CommonStrideInBytes;
 
     /// TypeByteSize is either the common store size of both accesses, or 0 when
     /// store sizes mismatch.
@@ -420,12 +421,14 @@ class MemoryDepChecker {
     bool AIsWrite;
     bool BIsWrite;
 
-    DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
-                                 std::optional<uint64_t> CommonStride,
+    DepDistanceStrideAndSizeInfo(const SCEV *Dist,
+                                 const SCEV *MaxAbsStrideInBytes,
+                                 const SCEV *CommonStride,
                                  uint64_t TypeByteSize, bool AIsWrite,
                                  bool BIsWrite)
-        : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
-          TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+        : Dist(Dist), MaxAbsStrideInBytes(MaxAbsStrideInBytes),
+          CommonStrideInBytes(CommonStride), TypeByteSize(TypeByteSize),
+          AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
   };
 
   /// Get the dependence distance, strides, type size and whether it is a write
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f4fb45bfeae5f..c3aaa020745db 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1979,7 +1979,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
 ///     }
 static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
                                      const SCEV &MaxBTC, const SCEV &Dist,
-                                     uint64_t MaxStride) {
+                                     const SCEV &MaxStride) {
 
   // If we can prove that
   //      (**) |Dist| > MaxBTC * Step
@@ -1998,13 +1998,15 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
   // also guarantees that distance >= VF.
   //
-  const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
-  const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
+  Type *ProductTy = SE.getWiderType(MaxBTC.getType(), MaxStride.getType());
+  const SCEV *Product =
+      SE.getMulExpr(SE.getNoopOrZeroExtend(&MaxBTC, ProductTy),
+                    SE.getTruncateOrZeroExtend(&MaxStride, ProductTy));
 
   const SCEV *CastedDist = &Dist;
   const SCEV *CastedProduct = Product;
   uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
-  uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
+  uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(ProductTy);
 
   // The dependence distance can be positive/negative, so we sign extend Dist;
   // The multiplication of the absolute stride in bytes and the
@@ -2012,7 +2014,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
   if (DistTypeSizeBits > ProductTypeSizeBits)
     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
   else
-    CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
+    CastedDist = SE.getNoopOrSignExtend(&Dist, ProductTy);
 
   // Is  Dist - (MaxBTC * Step) > 0 ?
   // (If so, then we have proven (**) because |Dist| >= Dist)
@@ -2027,19 +2029,28 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
   return SE.isKnownPositive(Minus);
 }
 
-/// Check the dependence for two accesses with the same stride \p Stride.
-/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
-/// size in bytes.
+/// Check the dependence for two accesses in the same direction with the same
+/// absolute stride \p Stride. \p Distance is the positive distance in bytes,
+/// and \p TypeByteSize is type size in bytes.
 ///
 /// \returns true if they are independent.
-static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
-                                          uint64_t TypeByteSize) {
-  assert(Stride > 1 && "The stride must be greater than 1");
-  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
-  assert(Distance > 0 && "The distance must be non-zero");
-
-  // Skip if the distance is not multiple of type byte size.
-  if (Distance % TypeByteSize)
+static bool areStridedAccessesIndependent(ScalarEvolution &SE,
+                                          const SCEV *Distance,
+                                          const SCEV *Stride,
+                                          const SCEV *TypeByteSize) {
+  assert(Stride && "Must be strided!");
+  assert(SE.isKnownNonNegative(Stride) && "Stride must be absolute!");
+  assert(SE.isKnownPositive(Distance) && "The distance must be non-zero");
+  assert(SE.isKnownPositive(TypeByteSize) &&
+         "The type size in byte must be non-zero");
+
+  // Check if guaranteed `Stride > 1`.
+  if (!SE.isKnownPositive(
+          SE.getAddExpr(Stride, SE.getMinusOne(Stride->getType()))))
+    return false;
+
+  // Skip if the distance is not guaranteed to be a multiple of type byte size.
+  if (!SE.getURemExpr(Distance, TypeByteSize)->isZero())
     return false;
 
   // No dependence if the distance is not multiple of the stride.
@@ -2058,7 +2069,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
   // Two accesses in memory (distance is 4, stride is 3):
   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
   //     |      |      |      |      | A[4] |      |      | A[7] |      |
-  return Distance % Stride;
+  Type *CommonTy = SE.getWiderType(Distance->getType(), Stride->getType());
+  Distance = SE.getNoopOrZeroExtend(Distance, CommonTy);
+  Stride = SE.getNoopOrZeroExtend(Stride, CommonTy);
+  auto *KnownRem = dyn_cast<SCEVConstant>(SE.getURemExpr(Distance, Stride));
+  return KnownRem && !KnownRem->isZero();
 }
 
 bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
@@ -2116,10 +2131,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
       BPtr->getType()->getPointerAddressSpace())
     return MemoryDepChecker::Dependence::Unknown;
 
-  std::optional<int64_t> StrideAPtr = getPtrStride(
-      PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
-  std::optional<int64_t> StrideBPtr = getPtrStride(
-      PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
+  const SCEV *StrideAPtr = getPtrStrideScev(PSE, ATy, APtr, InnermostLoop, *DT,
+                                            SymbolicStrides, true, true);
+  const SCEV *StrideBPtr = getPtrStrideScev(PSE, BTy, BPtr, InnermostLoop, *DT,
+                                            SymbolicStrides, true, true);
 
   const SCEV *Src = PSE.getSCEV(APtr);
   const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -2127,7 +2142,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   // If the induction step is negative we have to invert source and sink of the
   // dependence when measuring the distance between them. We should not swap
   // AIsWrite with BIsWrite, as their uses expect them in program order.
-  if (StrideAPtr && *StrideAPtr < 0) {
+
+  assert((!StrideAPtr || SE.isKnownNonPositive(StrideAPtr) ||
+          SE.isKnownNonNegative(StrideAPtr)) &&
+         "Did getPtrStrideScev's guarantees change?");
+
+  // TODO: Does that work ok for run-time zero not known in compile-time?
+  if (StrideAPtr && SE.isKnownNonPositive(StrideAPtr) &&
+      !StrideAPtr->isZero()) {
     std::swap(Src, Sink);
     std::swap(AInst, BInst);
     std::swap(ATy, BTy);
@@ -2141,32 +2163,32 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
                     << ": " << *Dist << "\n");
 
-  // Need accesses with constant strides and the same direction for further
-  // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
-  // similar code or pointer arithmetic that could wrap in the address space.
+  // Need accesses with loop-invariant strides and the same direction for
+  // further dependence analysis. We don't want to vectorize "A[B[i]] += ..."
+  // and similar code or pointer arithmetic that could wrap in the address
+  // space.
 
   // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
   // not loop-invariant (stride will be 0 in that case), we cannot analyze the
   // dependence further and also cannot generate runtime checks.
   if (!StrideAPtr || !StrideBPtr) {
-    LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+    LLVM_DEBUG(dbgs() << "Non-strided pointer access\n");
     return MemoryDepChecker::Dependence::IndirectUnsafe;
   }
 
-  int64_t StrideAPtrInt = *StrideAPtr;
-  int64_t StrideBPtrInt = *StrideBPtr;
-  LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << StrideAPtrInt
-                    << " Sink induction step: " << StrideBPtrInt << "\n");
+  LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << *StrideAPtr
+                    << " Sink induction step: " << *StrideBPtr << "\n");
   // At least Src or Sink are loop invariant and the other is strided or
   // invariant. We can generate a runtime check to disambiguate the accesses.
-  if (!StrideAPtrInt || !StrideBPtrInt)
+  if (StrideAPtr->isZero() || StrideBPtr->isZero())
     return MemoryDepChecker::Dependence::Unknown;
 
   // Both Src and Sink have a constant stride, check if they are in the same
   // direction.
-  if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
+  if (!SE.haveSameSign(StrideAPtr, StrideBPtr)) {
     LLVM_DEBUG(
-        dbgs() << "Pointer access with strides in different directions\n");
+        dbgs()
+        << "Pointer access with strides in potentially different directions\n");
     return MemoryDepChecker::Dependence::Unknown;
   }
 
@@ -2179,19 +2201,26 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
   uint64_t BSz = DL.getTypeAllocSize(BTy);
   uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
 
-  uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
-  uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
+  const SCEV *StrideAScaled =
+      SE.getMulExpr(SE.getAbsExpr(StrideAPtr, false),
+                    SE.getConstant(StrideAPtr->getType(), ASz));
+  const SCEV *StrideBScaled =
+      SE.getMulExpr(SE.getAbsExpr(StrideBPtr, false),
+                    SE.getConstant(StrideBPtr->getType(), ASz));
 
-  uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
+  const SCEV *MaxAbsStrideInBytes =
+      SE.getUMaxExpr(StrideAScaled, StrideBScaled);
 
-  std::optional<uint64_t> CommonStride;
+  Type *I64Ty = Type::getInt64Ty(ATy->getContext());
+
+  const SCEV *CommonStride = nullptr;
   if (StrideAScaled == StrideBScaled)
     CommonStride = StrideAScaled;
 
   // TODO: Historically, we didn't retry with runtime checks when (unscaled)
   // strides were different but there is no inherent reason to.
   if (!isa<SCEVConstant>(Dist))
-    ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
+    ShouldRetryWithRuntimeChecks |= StrideAPtr == StrideBPtr;
 
   // If distance is a SCEVCouldNotCompute, return Unknown immediately.
   if (isa<SCEVCouldNotCompute>(Dist)) {
@@ -2199,7 +2228,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
     return Dependence::Unknown;
   }
 
-  return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
+  return DepDistanceStrideAndSizeInfo(Dist, MaxAbsStrideInBytes, CommonStride,
                                       TypeByteSize, AIsWrite, BIsWrite);
 }
 
@@ -2232,9 +2261,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     return std::get<Dependence::DepType>(Res);
   }
 
-  auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
-      std::get<DepDistanceStrideAndSizeInfo>(Res);
-  bool HasSameSize = TypeByteSize > 0;
+  auto &[Dist, MaxStride, CommonStride, CommonTypeSizeInBytes, AIsWrite,
+         BIsWrite] = std::get<DepDistanceStrideAndSizeInfo>(Res);
 
   ScalarEvolution &SE = *PSE.getSE();
   auto &DL = InnermostLoop->getHeader()->getDataLayout();
@@ -2244,23 +2272,35 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // upper bound of the number of iterations), the accesses are independet, i.e.
   // they are far enough appart that accesses won't access the same location
   // across all loop ierations.
-  if (HasSameSize &&
+  if (CommonTypeSizeInBytes &&
       isSafeDependenceDistance(
-          DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
+          DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, *MaxStride))
     return Dependence::NoDep;
 
-  // The rest of this function relies on ConstDist being at most 64-bits, which
-  // is checked earlier. Will assert if the calling code changes.
+  // The rest of this function relies on ConstDist being at most 64-bits.
+
   const APInt *APDist = nullptr;
-  uint64_t ConstDist =
-      match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
+  uint64_t ConstDist = 0;
+
+  if (match(Dist, m_scev_APInt(APDist))) {
+    std::optional<uint64_t> MaybeZExt = APDist->abs().getZExtValue();
+    if (!MaybeZExt) {
+      LLVM_DEBUG(dbgs() << "LAA: Distance is too huge.\n");
+      return Dependence::IndirectUnsafe;
+    }
+    ConstDist = *MaybeZExt;
+  }
+
+  Type *I64Ty = Type::getInt64Ty(SE.getContext());
 
   // Attempt to prove strided accesses independent.
   if (APDist) {
     // If the distance between accesses and their strides are known constants,
     // check whether the accesses interlace each other.
-    if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
-        areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
+    if (ConstDist > 0 && CommonStride && CommonTypeSizeInBytes &&
+        areStridedAccessesIndependent(
+            SE, SE.getConstant(I64Ty, ConstDist), CommonStride,
+            SE.getConstant(I64Ty, CommonTypeSizeInBytes))) {
       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
       return Dependence::NoDep;
     }
@@ -2274,9 +2314,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // Negative distances are not plausible dependencies.
   if (SE.isKnownNonPositive(Dist)) {
     if (SE.isKnownNonNegative(Dist)) {
-      if (HasSameSize) {
-        // Write to the same location with the same size.
-        return Dependence::Forward;
+      if (CommonTypeSizeInBytes) {
+        if (SE.isKnownNonZero(CommonStride))
+          // Write to the same location with the same size.
+          return Dependence::Forward;
+        else
+          // Needs a RT check on the stride, not implemented yet.
+          return Dependence::IndirectUnsafe;
       }
       LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
                            "different type sizes\n");
@@ -2297,8 +2341,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
         return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
                                               : Dependence::Unknown;
       }
-      if (!HasSameSize ||
-          couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
+      if (!CommonTypeSizeInBytes ||
+          couldPreventStoreLoadForward(ConstDist, CommonTypeSizeInBytes)) {
         LLVM_DEBUG(
             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
         return Dependence::ForwardButPreventsForwarding;
@@ -2316,7 +2360,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
                                           : Dependence::Unknown;
   }
 
-  if (!HasSameSize) {
+  if (!CommonTypeSizeInBytes) {
     if (CheckCompletelyBeforeOrAfter())
       return Dependence::NoDep;
     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2331,6 +2375,19 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // The minimum number of iterations for a vectorized/unrolled version.
   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
 
+  if (!isa<SCEVConstant>(MaxStride)) {
+    LLVM_DEBUG(dbgs() << "LAA: Cannot analyze non-constant stride further\n");
+    return Dependence::Unknown;
+  }
+
+  std::optional<int64_t> MaybeMaxStrideVal =
+      cast<SCEVConstant>(MaxStride)->getAPInt().getSExtValue();
+  if (!MaybeMaxStrideVal) {
+    LLVM_DEBUG(dbgs() << "LAA: Cannot analyze huge constant stride further\n");
+    return Dependence::Unknown;
+  }
+  int64_t MaxStrideVal = *MaybeMaxStrideVal;
+
   // It's not vectorizable if the distance is smaller than the minimum distance
   // needed for a vectroized/unrolled version. Vectorizing one iteration in
   // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
@@ -2364,7 +2421,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // We know that Dist is positive, but it may not be constant. Use the signed
   // minimum for computations below, as this ensures we compute the closest
   // possible dependence distance.
-  uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
+  uint64_t MinDistanceNeeded =
+      MaxStrideVal * (MinNumIter - 1) + CommonTypeSizeInBytes;
   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
     if (!ConstDist) {
       // For non-constant distances, we checked the lower bound of the
@@ -2392,14 +2450,17 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
   if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
-      couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
+      isa<SCEVConstant>(*CommonStride) &&
+      couldPreventStoreLoadForward(
+          MinDistance, CommonTypeSizeInBytes,
+          cast<SCEVConstant>(CommonStride)->getAPInt().getSExtValue()))
     return Dependence::BackwardVectorizableButPreventsForwarding;
 
-  uint64_t MaxVF = MinDepDistBytes / MaxStride;
+  uint64_t MaxVF = MinDepDistBytes / MaxStrideVal;
   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
                     << " with max VF = " << MaxVF << '\n');
 
-  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+  uint64_t MaxVFInBits = MaxVF * CommonTypeSizeInBytes * 8;
   if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
     // For non-constant distances, we checked the lower bound of the dependence
     // distance and the distance may be larger at runtime (and safe for
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll b/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
index 2df451d5df738..076fbde61bb50 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
@@ -117,13 +117,8 @@ exit:
 define void @forward_i128_step_63bit_plus_one(ptr %A, i128 %n) {
 ; CHECK-LABEL: 'forward_i128_step_63bit_plus_one'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %l = load i32, ptr %gep.A.1, align 4 ->
-; CHECK-NEXT:            store i32 %l, ptr %gep.A, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
index ee7aa504d3a84..bf8e1837e2ade 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
@@ -4,13 +4,8 @@
 define void @known_safe(ptr %p, i8 %a) {
 ; CHECK-LABEL: 'known_safe'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
@@ -47,13 +42,8 @@ exit:
 define void @known_safe_byte_geps(ptr %p, i8 %a) {
 ; CHECK-LABEL: 'known_safe_byte_geps'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-EMPTY:
@@ -92,15 +82,21 @@ exit:
 define void @safe_if_non_zero(ptr %p, i8 %a) {
 ; CHECK-LABEL: 'safe_if_non_zero'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umin ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umax ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT:            Member: {((1024 * (zext i8 %a to i64))<nuw><nsw> + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nw><%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nuw><%header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -135,15 +131,21 @@ exit:
 define void @safe_if_non_zero_byte_gep(ptr %p, i8 %a) {
 ; CHECK-LABEL: 'safe_if_non_zero_byte_gep'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umin ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umax ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT:            Member: {((1024 * (zext i8 %a to i64))<nuw><nsw> + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<%header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -224,15 +226,21 @@ exit:
 define void @offset_dep_check_sufficient(ptr %p, i8 %a, i64 %offset) {
 ; CHECK-LABEL: 'offset_dep_check_sufficient'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: (((8 * %offset) + %p) umin (1016 + (8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((8 * %offset) + %p) umax (1016 + (8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT:            Member: {((8 * %offset) + %p),+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nw><%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -315,15 +323,21 @@ exit:
 define void @needs_non_zero_stride_and_distance_checks(ptr %p, i8 %a, i64 %offset) {
 ; CHECK-LABEL: 'needs_non_zero_stride_and_distance_checks'
 ; CHECK-NEXT:    header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unsafe indirect dependence.
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        IndirectUnsafe:
-; CHECK-NEXT:            %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT:            store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group GRP0:
+; CHECK-NEXT:          %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT:        Against group GRP1:
+; CHECK-NEXT:          %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group GRP0:
+; CHECK-NEXT:          (Low: (((8 * %offset) + %p) umin ((8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((8 * %offset) + %p) umax ((8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT:            Member: {((8 * %offset) + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nw><%header>
+; CHECK-NEXT:        Group GRP1:
+; CHECK-NEXT:          (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT:            Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nuw><%header>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions: