[llvm-branch-commits] [llvm] [LAA] Support more cases with non-constant strided accesses (PR #187406)
Andrei Elovikov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 18 16:58:43 PDT 2026
https://github.com/eas created https://github.com/llvm/llvm-project/pull/187406
Namely, allow vectorization of
```c++
for (int i = 0; i < N; i += stride)
a[i] = a[offset + i];
```
...if `stride` is statically known to be positive even if it's non-constant.
>From ae04200ad6226f9ed881a4aa4422f1b02a76a279 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Wed, 18 Mar 2026 12:01:55 -0700
Subject: [PATCH 1/2] Add a test
---
.../non-constant-strides.ll | 526 ++++++++++++++++++
1 file changed, 526 insertions(+)
create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
new file mode 100644
index 0000000000000..ee7aa504d3a84
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes='print<access-info>' -disable-output < %s -enable-mem-access-versioning=false 2>&1 | FileCheck %s
+
+define void @known_safe(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_safe'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %a.zext = zext i8 %a to i64
+ %stride = add i64 %a.zext, 1
+ %offset = mul i64 %stride, 128
+ %p.out = getelementptr i64, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @known_safe_byte_geps(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_safe_byte_geps'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %a.zext = zext i8 %a to i64
+ %stride.elts = add i64 %a.zext, 1
+ %stride = mul i64 %stride.elts, 8
+ %offset = mul i64 %stride, 128
+ %p.out = getelementptr i8, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; `%a u> 0` would be enough to prove safety.
+define void @safe_if_non_zero(ptr %p, i8 %a) {
+; CHECK-LABEL: 'safe_if_non_zero'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %stride = zext i8 %a to i64
+ %offset = mul i64 %stride, 128
+ %p.out = getelementptr i64, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; `%a u> 0` would be enough to prove safety.
+define void @safe_if_non_zero_byte_gep(ptr %p, i8 %a) {
+; CHECK-LABEL: 'safe_if_non_zero_byte_gep'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %stride.elts = zext i8 %a to i64
+ %stride = mul i64 %stride.elts, 8
+ %offset = mul i64 %stride, 128
+ %p.out = getelementptr i8, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; `%a u>= 8` is necessary to prove safety.
+define void @known_non_negative_byte_gep(ptr %p, i8 %a) {
+; CHECK-LABEL: 'known_non_negative_byte_gep'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %stride = zext i8 %a to i64
+ %offset = mul i64 %stride, 128
+ %p.out = getelementptr i8, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Stride is safe (i.e., `%stride u>= type-size` is known statically), only the
+; `%offset` needs to be checked to ensure in/out pointers have enough distance
+; between them.
+define void @offset_dep_check_sufficient(ptr %p, i8 %a, i64 %offset) {
+; CHECK-LABEL: 'offset_dep_check_sufficient'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %a.zext = zext i8 %a to i64
+ %stride = add i64 %a.zext, 1
+ %p.out = getelementptr i64, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Same as `@offset_dep_check_sufficient` but both input/output pointers are arguments instead of
+; common base plus offset.
+define void @distance_dep_check_sufficient(ptr %p, ptr %p.out, i8 %a) {
+; CHECK-LABEL: 'distance_dep_check_sufficient'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p.out) umin %p.out) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p.out) umax %p.out)))
+; CHECK-NEXT: Member: {%p.out,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %a.zext = zext i8 %a to i64
+ %stride = add i64 %a.zext, 1
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+define void @needs_non_zero_stride_and_distance_checks(ptr %p, i8 %a, i64 %offset) {
+; CHECK-LABEL: 'needs_non_zero_stride_and_distance_checks'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %stride = zext i8 %a to i64
+ %p.out = getelementptr i64, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Both distance (offset) and the stride (non-zero, no-overflow) need to be
+; checked at run time.
+define void @needs_all(ptr %p, i64 %stride, i64 %offset) {
+; CHECK-LABEL: 'needs_all'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: IndirectUnsafe:
+; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
+; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %p.out = getelementptr i64, ptr %p, i64 %offset
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Same as `@needs_all` but both input/output pointers are arguments instead of
+; common base plus offset.
+define void @needs_all_distinct_ptrs(ptr %p, ptr %p.out, i64 %stride) {
+; CHECK-LABEL: 'needs_all_distinct_ptrs'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: (((1016 * %stride) + %p.out) umin %p.out) High: (8 + (((1016 * %stride) + %p.out) umax %p.out)))
+; CHECK-NEXT: Member: {%p.out,+,(8 * %stride)}<%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: (((1016 * %stride) + %p) umin %p) High: (8 + (((1016 * %stride) + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 * %stride)}<%header>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
+ %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+ %ld = load i64, ptr %gep.ld
+ %add = add i64 %ld, 1
+ store i64 %add, ptr %gep.st
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Safe to vectorize, only last store is visible outside the loop and `scatter`
+; is ordered even if `%stride == 0` in run-time. However, might be better to be
+; conservative here to avoid miscompiling the next test.
+define void @waw_no_mask(ptr %p, i64 %stride) {
+; CHECK-LABEL: 'waw_no_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %header ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ store i64 %iv, ptr %gep
+ store i64 %iv.next, ptr %gep
+
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
+
+; Unsafe to vectorize without `%stride != 0` RT check.
+; FIXME: https://github.com/llvm/llvm-project/issues/187402
+define void @waw_mask(ptr %p, i64 %stride, i64 %n0, i64 %n1) {
+; CHECK-LABEL: 'waw_mask'
+; CHECK-NEXT: header:
+; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %header
+
+header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+ %iv.next = add nsw i64 %iv, 1
+ %idx = mul nsw nuw i64 %iv, %stride
+
+ %c0 = icmp sle i64 %iv, %n0
+ %c1 = icmp sle i64 %iv, %n1
+
+ %gep = getelementptr inbounds i64, ptr %p, i64 %idx
+ br i1 %c0, label %store0, label %merge
+
+store0:
+ store i64 %iv, ptr %gep
+ br label %merge
+
+merge:
+ br i1 %c1, label %store1, label %latch
+
+store1:
+ store i64 %iv.next, ptr %gep
+ br label %latch
+
+latch:
+ %exitcond = icmp slt i64 %iv.next, 128
+ br i1 %exitcond, label %header, label %exit
+
+exit:
+ ret void
+}
>From f21d290f84db317405de5e52acb4a1b187904ee5 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Wed, 18 Mar 2026 12:44:15 -0700
Subject: [PATCH 2/2] [LAA] Support more cases with non-constant strided
accesses
Namely, allow vectorization of
```c++
for (int i = 0; i < N; i += stride)
a[i] = a[offset + i];
```
...if `stride` is statically known to be positive even if it's non-constant.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 15 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 181 ++++++++++++------
.../dependences-i128-inductions.ll | 7 +-
.../non-constant-strides.ll | 86 +++++----
4 files changed, 181 insertions(+), 108 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index ac0b454d33737..92213f93db8c7 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -410,8 +410,9 @@ class MemoryDepChecker {
/// Strides here are scaled; i.e. in bytes, taking the size of the
/// underlying type into account.
- uint64_t MaxStride;
- std::optional<uint64_t> CommonStride;
+ const SCEV *MaxAbsStrideInBytes;
+ /// If `nullptr` then strides (might) differ.
+ const SCEV *CommonStrideInBytes;
/// TypeByteSize is either the common store size of both accesses, or 0 when
/// store sizes mismatch.
@@ -420,12 +421,14 @@ class MemoryDepChecker {
bool AIsWrite;
bool BIsWrite;
- DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
- std::optional<uint64_t> CommonStride,
+ DepDistanceStrideAndSizeInfo(const SCEV *Dist,
+ const SCEV *MaxAbsStrideInBytes,
+ const SCEV *CommonStride,
uint64_t TypeByteSize, bool AIsWrite,
bool BIsWrite)
- : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
- TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
+ : Dist(Dist), MaxAbsStrideInBytes(MaxAbsStrideInBytes),
+ CommonStrideInBytes(CommonStride), TypeByteSize(TypeByteSize),
+ AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
};
/// Get the dependence distance, strides, type size and whether it is a write
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f4fb45bfeae5f..c3aaa020745db 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1979,7 +1979,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
/// }
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
const SCEV &MaxBTC, const SCEV &Dist,
- uint64_t MaxStride) {
+ const SCEV &MaxStride) {
// If we can prove that
// (**) |Dist| > MaxBTC * Step
@@ -1998,13 +1998,15 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// will be executed only if LoopCount >= VF, proving distance >= LoopCount
// also guarantees that distance >= VF.
//
- const SCEV *Step = SE.getConstant(MaxBTC.getType(), MaxStride);
- const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
+ Type *ProductTy = SE.getWiderType(MaxBTC.getType(), MaxStride.getType());
+ const SCEV *Product =
+ SE.getMulExpr(SE.getNoopOrZeroExtend(&MaxBTC, ProductTy),
+ SE.getTruncateOrZeroExtend(&MaxStride, ProductTy));
const SCEV *CastedDist = &Dist;
const SCEV *CastedProduct = Product;
uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
- uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
+ uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(ProductTy);
// The dependence distance can be positive/negative, so we sign extend Dist;
// The multiplication of the absolute stride in bytes and the
@@ -2012,7 +2014,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
if (DistTypeSizeBits > ProductTypeSizeBits)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
else
- CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
+ CastedDist = SE.getNoopOrSignExtend(&Dist, ProductTy);
// Is Dist - (MaxBTC * Step) > 0 ?
// (If so, then we have proven (**) because |Dist| >= Dist)
@@ -2027,19 +2029,28 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
return SE.isKnownPositive(Minus);
}
-/// Check the dependence for two accesses with the same stride \p Stride.
-/// \p Distance is the positive distance in bytes, and \p TypeByteSize is type
-/// size in bytes.
+/// Check the dependence for two accesses in the same direction with the same
+/// absolute stride \p Stride. \p Distance is the positive distance in bytes,
+/// and \p TypeByteSize is type size in bytes.
///
/// \returns true if they are independent.
-static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
- uint64_t TypeByteSize) {
- assert(Stride > 1 && "The stride must be greater than 1");
- assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
- assert(Distance > 0 && "The distance must be non-zero");
-
- // Skip if the distance is not multiple of type byte size.
- if (Distance % TypeByteSize)
+static bool areStridedAccessesIndependent(ScalarEvolution &SE,
+ const SCEV *Distance,
+ const SCEV *Stride,
+ const SCEV *TypeByteSize) {
+ assert(Stride && "Must be strided!");
+ assert(SE.isKnownNonNegative(Stride) && "Stride must be absolute!");
+ assert(SE.isKnownPositive(Distance) && "The distance must be non-zero");
+ assert(SE.isKnownPositive(TypeByteSize) &&
+ "The type size in byte must be non-zero");
+
+ // Check if guaranteed `Stride > 1`.
+ if (!SE.isKnownPositive(
+ SE.getAddExpr(Stride, SE.getMinusOne(Stride->getType()))))
+ return false;
+
+ // Skip if the distance is not guaranteed to be a multiple of type byte size.
+ if (!SE.getURemExpr(Distance, TypeByteSize)->isZero())
return false;
// No dependence if the distance is not multiple of the stride.
@@ -2058,7 +2069,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
// Two accesses in memory (distance is 4, stride is 3):
// | A[0] | | | A[3] | | | A[6] | | |
// | | | | | A[4] | | | A[7] | |
- return Distance % Stride;
+ Type *CommonTy = SE.getWiderType(Distance->getType(), Stride->getType());
+ Distance = SE.getNoopOrZeroExtend(Distance, CommonTy);
+ Stride = SE.getNoopOrZeroExtend(Stride, CommonTy);
+ auto *KnownRem = dyn_cast<SCEVConstant>(SE.getURemExpr(Distance, Stride));
+ return KnownRem && !KnownRem->isZero();
}
bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src,
@@ -2116,10 +2131,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- std::optional<int64_t> StrideAPtr = getPtrStride(
- PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
- std::optional<int64_t> StrideBPtr = getPtrStride(
- PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);
+ const SCEV *StrideAPtr = getPtrStrideScev(PSE, ATy, APtr, InnermostLoop, *DT,
+ SymbolicStrides, true, true);
+ const SCEV *StrideBPtr = getPtrStrideScev(PSE, BTy, BPtr, InnermostLoop, *DT,
+ SymbolicStrides, true, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -2127,7 +2142,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
- if (StrideAPtr && *StrideAPtr < 0) {
+
+ assert((!StrideAPtr || SE.isKnownNonPositive(StrideAPtr) ||
+ SE.isKnownNonNegative(StrideAPtr)) &&
+ "Did getPtrStrideScev's guarantees change?");
+
+ // TODO: Does that work ok for run-time zero not known in compile-time?
+ if (StrideAPtr && SE.isKnownNonPositive(StrideAPtr) &&
+ !StrideAPtr->isZero()) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
std::swap(ATy, BTy);
@@ -2141,32 +2163,32 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");
- // Need accesses with constant strides and the same direction for further
- // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
- // similar code or pointer arithmetic that could wrap in the address space.
+ // Need accesses with loop-invariant strides and the same direction for
+ // further dependence analysis. We don't want to vectorize "A[B[i]] += ..."
+ // and similar code or pointer arithmetic that could wrap in the address
+ // space.
// If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
// not loop-invariant (stride will be 0 in that case), we cannot analyze the
// dependence further and also cannot generate runtime checks.
if (!StrideAPtr || !StrideBPtr) {
- LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+ LLVM_DEBUG(dbgs() << "Non-strided pointer access\n");
return MemoryDepChecker::Dependence::IndirectUnsafe;
}
- int64_t StrideAPtrInt = *StrideAPtr;
- int64_t StrideBPtrInt = *StrideBPtr;
- LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
- << " Sink induction step: " << StrideBPtrInt << "\n");
+ LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << *StrideAPtr
+ << " Sink induction step: " << *StrideBPtr << "\n");
// At least Src or Sink are loop invariant and the other is strided or
// invariant. We can generate a runtime check to disambiguate the accesses.
- if (!StrideAPtrInt || !StrideBPtrInt)
+ if (StrideAPtr->isZero() || StrideBPtr->isZero())
return MemoryDepChecker::Dependence::Unknown;
// Both Src and Sink have a constant stride, check if they are in the same
// direction.
- if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
+ if (!SE.haveSameSign(StrideAPtr, StrideBPtr)) {
LLVM_DEBUG(
- dbgs() << "Pointer access with strides in different directions\n");
+ dbgs()
+ << "Pointer access with strides in potentially different directions\n");
return MemoryDepChecker::Dependence::Unknown;
}
@@ -2179,19 +2201,26 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
uint64_t BSz = DL.getTypeAllocSize(BTy);
uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0;
- uint64_t StrideAScaled = std::abs(StrideAPtrInt) * ASz;
- uint64_t StrideBScaled = std::abs(StrideBPtrInt) * BSz;
+ const SCEV *StrideAScaled =
+ SE.getMulExpr(SE.getAbsExpr(StrideAPtr, false),
+ SE.getConstant(StrideAPtr->getType(), ASz));
+ const SCEV *StrideBScaled =
+ SE.getMulExpr(SE.getAbsExpr(StrideBPtr, false),
+ SE.getConstant(StrideBPtr->getType(), ASz));
- uint64_t MaxStride = std::max(StrideAScaled, StrideBScaled);
+ const SCEV *MaxAbsStrideInBytes =
+ SE.getUMaxExpr(StrideAScaled, StrideBScaled);
- std::optional<uint64_t> CommonStride;
+ Type *I64Ty = Type::getInt64Ty(ATy->getContext());
+
+ const SCEV *CommonStride = nullptr;
if (StrideAScaled == StrideBScaled)
CommonStride = StrideAScaled;
// TODO: Historically, we didn't retry with runtime checks when (unscaled)
// strides were different but there is no inherent reason to.
if (!isa<SCEVConstant>(Dist))
- ShouldRetryWithRuntimeChecks |= StrideAPtrInt == StrideBPtrInt;
+ ShouldRetryWithRuntimeChecks |= StrideAPtr == StrideBPtr;
// If distance is a SCEVCouldNotCompute, return Unknown immediately.
if (isa<SCEVCouldNotCompute>(Dist)) {
@@ -2199,7 +2228,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
return Dependence::Unknown;
}
- return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
+ return DepDistanceStrideAndSizeInfo(Dist, MaxAbsStrideInBytes, CommonStride,
TypeByteSize, AIsWrite, BIsWrite);
}
@@ -2232,9 +2261,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return std::get<Dependence::DepType>(Res);
}
- auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
- std::get<DepDistanceStrideAndSizeInfo>(Res);
- bool HasSameSize = TypeByteSize > 0;
+ auto &[Dist, MaxStride, CommonStride, CommonTypeSizeInBytes, AIsWrite,
+ BIsWrite] = std::get<DepDistanceStrideAndSizeInfo>(Res);
ScalarEvolution &SE = *PSE.getSE();
auto &DL = InnermostLoop->getHeader()->getDataLayout();
@@ -2244,23 +2272,35 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// upper bound of the number of iterations), the accesses are independet, i.e.
// they are far enough appart that accesses won't access the same location
// across all loop ierations.
- if (HasSameSize &&
+ if (CommonTypeSizeInBytes &&
isSafeDependenceDistance(
- DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, MaxStride))
+ DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), *Dist, *MaxStride))
return Dependence::NoDep;
- // The rest of this function relies on ConstDist being at most 64-bits, which
- // is checked earlier. Will assert if the calling code changes.
+ // The rest of this function relies on ConstDist being at most 64-bits.
+
const APInt *APDist = nullptr;
- uint64_t ConstDist =
- match(Dist, m_scev_APInt(APDist)) ? APDist->abs().getZExtValue() : 0;
+ uint64_t ConstDist = 0;
+
+ if (match(Dist, m_scev_APInt(APDist))) {
+ std::optional<uint64_t> MaybeZExt = APDist->abs().getZExtValue();
+ if (!MaybeZExt) {
+ LLVM_DEBUG(dbgs() << "LAA: Distance is too huge.\n");
+ return Dependence::IndirectUnsafe;
+ }
+ ConstDist = *MaybeZExt;
+ }
+
+ Type *I64Ty = Type::getInt64Ty(SE.getContext());
// Attempt to prove strided accesses independent.
if (APDist) {
// If the distance between accesses and their strides are known constants,
// check whether the accesses interlace each other.
- if (ConstDist > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
- areStridedAccessesIndependent(ConstDist, *CommonStride, TypeByteSize)) {
+ if (ConstDist > 0 && CommonStride && CommonTypeSizeInBytes &&
+ areStridedAccessesIndependent(
+ SE, SE.getConstant(I64Ty, ConstDist), CommonStride,
+ SE.getConstant(I64Ty, CommonTypeSizeInBytes))) {
LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
}
@@ -2274,9 +2314,13 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Negative distances are not plausible dependencies.
if (SE.isKnownNonPositive(Dist)) {
if (SE.isKnownNonNegative(Dist)) {
- if (HasSameSize) {
- // Write to the same location with the same size.
- return Dependence::Forward;
+ if (CommonTypeSizeInBytes) {
+ if (SE.isKnownNonZero(CommonStride))
+ // Write to the same location with the same size.
+ return Dependence::Forward;
+ else
+ // Needs a RT check on the stride, not implemented yet.
+ return Dependence::IndirectUnsafe;
}
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
"different type sizes\n");
@@ -2297,8 +2341,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
: Dependence::Unknown;
}
- if (!HasSameSize ||
- couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
+ if (!CommonTypeSizeInBytes ||
+ couldPreventStoreLoadForward(ConstDist, CommonTypeSizeInBytes)) {
LLVM_DEBUG(
dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
return Dependence::ForwardButPreventsForwarding;
@@ -2316,7 +2360,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
: Dependence::Unknown;
}
- if (!HasSameSize) {
+ if (!CommonTypeSizeInBytes) {
if (CheckCompletelyBeforeOrAfter())
return Dependence::NoDep;
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
@@ -2331,6 +2375,19 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// The minimum number of iterations for a vectorized/unrolled version.
unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
+ if (!isa<SCEVConstant>(MaxStride)) {
+ LLVM_DEBUG(dbgs() << "LAA: Cannot analyze non-constant stride further\n");
+ return Dependence::Unknown;
+ }
+
+ std::optional<int64_t> MaybeMaxStrideVal =
+ cast<SCEVConstant>(MaxStride)->getAPInt().getSExtValue();
+ if (!MaybeMaxStrideVal) {
+ LLVM_DEBUG(dbgs() << "LAA: Cannot analyze huge constant stride further\n");
+ return Dependence::Unknown;
+ }
+ int64_t MaxStrideVal = *MaybeMaxStrideVal;
+
// It's not vectorizable if the distance is smaller than the minimum distance
// needed for a vectroized/unrolled version. Vectorizing one iteration in
// front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
@@ -2364,7 +2421,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// We know that Dist is positive, but it may not be constant. Use the signed
// minimum for computations below, as this ensures we compute the closest
// possible dependence distance.
- uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
+ uint64_t MinDistanceNeeded =
+ MaxStrideVal * (MinNumIter - 1) + CommonTypeSizeInBytes;
if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
if (!ConstDist) {
// For non-constant distances, we checked the lower bound of the
@@ -2392,14 +2450,17 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
- couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
+ isa<SCEVConstant>(*CommonStride) &&
+ couldPreventStoreLoadForward(
+ MinDistance, CommonTypeSizeInBytes,
+ cast<SCEVConstant>(CommonStride)->getAPInt().getSExtValue()))
return Dependence::BackwardVectorizableButPreventsForwarding;
- uint64_t MaxVF = MinDepDistBytes / MaxStride;
+ uint64_t MaxVF = MinDepDistBytes / MaxStrideVal;
LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
<< " with max VF = " << MaxVF << '\n');
- uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+ uint64_t MaxVFInBits = MaxVF * CommonTypeSizeInBytes * 8;
if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
// For non-constant distances, we checked the lower bound of the dependence
// distance and the distance may be larger at runtime (and safe for
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll b/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
index 2df451d5df738..076fbde61bb50 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/dependences-i128-inductions.ll
@@ -117,13 +117,8 @@ exit:
define void @forward_i128_step_63bit_plus_one(ptr %A, i128 %n) {
; CHECK-LABEL: 'forward_i128_step_63bit_plus_one'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %l = load i32, ptr %gep.A.1, align 4 ->
-; CHECK-NEXT: store i32 %l, ptr %gep.A, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
index ee7aa504d3a84..bf8e1837e2ade 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides.ll
@@ -4,13 +4,8 @@
define void @known_safe(ptr %p, i8 %a) {
; CHECK-LABEL: 'known_safe'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -47,13 +42,8 @@ exit:
define void @known_safe_byte_geps(ptr %p, i8 %a) {
; CHECK-LABEL: 'known_safe_byte_geps'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
@@ -92,15 +82,21 @@ exit:
define void @safe_if_non_zero(ptr %p, i8 %a) {
; CHECK-LABEL: 'safe_if_non_zero'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umin ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umax ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT: Member: {((1024 * (zext i8 %a to i64))<nuw><nsw> + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nw><%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nuw><%header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -135,15 +131,21 @@ exit:
define void @safe_if_non_zero_byte_gep(ptr %p, i8 %a) {
; CHECK-LABEL: 'safe_if_non_zero_byte_gep'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i8, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i8, ptr %p, i64 %idx
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umin ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((1024 * (zext i8 %a to i64))<nuw><nsw> + %p) umax ((2040 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT: Member: {((1024 * (zext i8 %a to i64))<nuw><nsw> + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<%header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -224,15 +226,21 @@ exit:
define void @offset_dep_check_sufficient(ptr %p, i8 %a, i64 %offset) {
; CHECK-LABEL: 'offset_dep_check_sufficient'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: (((8 * %offset) + %p) umin (1016 + (8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((8 * %offset) + %p) umax (1016 + (8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT: Member: {((8 * %offset) + %p),+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nw><%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + ((1016 + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 + (8 * (zext i8 %a to i64))<nuw><nsw>)<nuw><nsw>}<nuw><%header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -315,15 +323,21 @@ exit:
define void @needs_non_zero_stride_and_distance_checks(ptr %p, i8 %a, i64 %offset) {
; CHECK-LABEL: 'needs_non_zero_stride_and_distance_checks'
; CHECK-NEXT: header:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Unsafe indirect dependence.
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: IndirectUnsafe:
-; CHECK-NEXT: %ld = load i64, ptr %gep.ld, align 4 ->
-; CHECK-NEXT: store i64 %add, ptr %gep.st, align 4
-; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group GRP0:
+; CHECK-NEXT: %gep.st = getelementptr inbounds i64, ptr %p.out, i64 %idx
+; CHECK-NEXT: Against group GRP1:
+; CHECK-NEXT: %gep.ld = getelementptr inbounds i64, ptr %p, i64 %idx
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group GRP0:
+; CHECK-NEXT: (Low: (((8 * %offset) + %p) umin ((8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p)) High: (8 + (((8 * %offset) + %p) umax ((8 * %offset) + (1016 * (zext i8 %a to i64))<nuw><nsw> + %p))))
+; CHECK-NEXT: Member: {((8 * %offset) + %p),+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nw><%header>
+; CHECK-NEXT: Group GRP1:
+; CHECK-NEXT: (Low: (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umin %p) High: (8 + (((1016 * (zext i8 %a to i64))<nuw><nsw> + %p) umax %p)))
+; CHECK-NEXT: Member: {%p,+,(8 * (zext i8 %a to i64))<nuw><nsw>}<nuw><%header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
More information about the llvm-branch-commits
mailing list