[llvm] [LAA] Use MaxStride instead of CommonStride to calculate MaxVF (PR #98142)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 14 07:34:21 PDT 2024
================
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt --disable-output -mtriple=x86_64 --passes="print<access-info>" %s 2>&1 | FileCheck %s
+
+ at a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16
+
+; Generated from the following C code:
+; #define LEN 256 * 256
+; float a[LEN];
+;
+; void different_strides() {
+; for (int i = 0; i < LEN - 1024 - 255; i++) {
+; #pragma clang loop interleave(disable)
+; #pragma clang loop unroll(disable)
+; for (int j = 0; j < 256; j++)
+; a[i + j + 1024] += a[j * 4 + i];
+; }
+; }
+; The load and store have different strides(4 and 16 bytes respectively) but the store
+; is always at safe positive distance away from the load, thus BackwardVectorizable
+define dso_local void @different_strides_backward_vectorizable() local_unnamed_addr {
+; CHECK-LABEL: 'different_strides_backward_vectorizable'
+; CHECK-NEXT: inner.body:
+; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: BackwardVectorizable:
+; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
+; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
+; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+; CHECK-NEXT: outer.header:
+; CHECK-NEXT: Report: loop is not the innermost loop
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %outer.header
+
+outer.header:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
+ %0 = add nuw nsw i64 %i, 1024
+ br label %inner.body
+
+inner.body:
+ %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
+ %1 = shl nuw nsw i64 %j, 2
+ %2 = add nuw nsw i64 %1, %i
+ %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = add nuw nsw i64 %0, %j
+ %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
+ %5 = load float, ptr %arrayidx8, align 4
+ %add9 = fadd fast float %5, %3
+ store float %add9, ptr %arrayidx8, align 4
+ %j.next = add nuw nsw i64 %j, 1
+ %exitcond.not = icmp eq i64 %j.next, 256
+ br i1 %exitcond.not, label %outer.latch, label %inner.body
+
+outer.latch:
+ %i.next = add nuw nsw i64 %i, 1
+ %outerexitcond.not = icmp eq i64 %i.next, 64257
+ br i1 %outerexitcond.not, label %exit, label %outer.header
+
+exit:
+ ret void
+}
+
+
+; Generated from following C code:
+; void different_stride_and_not_vectorizable(){
+; for(int i = 0; i < LEN2; i++){
+; for(int j = 0 ; j < LEN; j++){
+; a[i + j + LEN] += a[i + 4*j];
+; }
+; }
+; }
+; The load and store have different strides, but the store and load are not at a
+; safe distance away from each other, thus not safe for vectorization.
+define dso_local void @different_stride_and_not_vectorizable() local_unnamed_addr {
----------------
fhahn wrote:
```suggestion
define void @different_stride_and_not_vectorizable() {
```
https://github.com/llvm/llvm-project/pull/98142
More information about the llvm-commits
mailing list