[llvm] [LAA] Fix incorrect dependency classification. (PR #70819)

Alexandros Lamprineas via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 07:24:15 PST 2023


https://github.com/labrinea updated https://github.com/llvm/llvm-project/pull/70819

>From ed06fea4be5c4504943fb9ebf993f31b05c4c55c Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas at arm.com>
Date: Tue, 31 Oct 2023 15:23:40 +0000
Subject: [PATCH] [LAA] Fix incorrect dependency classification.

As shown in #70473, the following loop was not considered safe to vectorize.
When determining the memory access dependencies in a loop which has negative
iteration step, we invert the source and sink of the dependence. Perhaps we
should just invert the operands to getMinusSCEV(). This way the dependency is
not regarded to be true, since the users of the `IsWrite` variables, which
correspond to each of the memory accesses, rely on program order and therefore
should not be swapped.

void vectorizable_Read_Write(int *A) {
  for (unsigned i = 1022; i >= 0; i--)
    A[i+1] = A[i] + 1;
}
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  9 +--
 .../forward-negative-step.ll                  | 13 ++--
 .../LoopAccessAnalysis/print-order.ll         | 65 +++++++++++++++++++
 3 files changed, 73 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/print-order.ll

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 91f5eab03b032..a8dbb66c4a9f0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1916,15 +1916,12 @@ getDependenceDistanceStrideAndSize(
   const SCEV *Src = PSE.getSCEV(APtr);
   const SCEV *Sink = PSE.getSCEV(BPtr);
 
-  // If the induction step is negative we have to invert source and sink of
-  // the dependence.
+  // If the induction step is negative we have to invert source and sink of the
+  // dependence when measuring the distance between them. We should not swap
+  // AIsWrite with BIsWrite, as their uses expect them in program order.
   if (StrideAPtr < 0) {
-    std::swap(APtr, BPtr);
-    std::swap(ATy, BTy);
     std::swap(Src, Sink);
-    std::swap(AIsWrite, BIsWrite);
     std::swap(AInst, BInst);
-    std::swap(StrideAPtr, StrideBPtr);
   }
 
   const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll
index 35e2109ab7447..46e81cd74ab31 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll
@@ -3,8 +3,6 @@
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
-; FIXME: This should be vectorizable
-
 ; void vectorizable_Read_Write(int *A) {
 ;  for (unsigned i = 1022; i >= 0; i--)
 ;    A[i+1] = A[i] + 1;
@@ -13,10 +11,9 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 define void @vectorizable_Read_Write(ptr nocapture %A) {
 ; CHECK-LABEL: 'vectorizable_Read_Write'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Forward loop carried data dependence that prevents store-to-load forwarding.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            %l = load i32, ptr %gep.A, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep.A.plus.1, align 4
 ; CHECK-EMPTY:
@@ -47,13 +44,13 @@ exit:
   ret void
 }
 
-; FIXME: There's a forward dependency that prevents forwarding here.
 define void @neg_step_ForwardButPreventsForwarding(ptr nocapture %A, ptr noalias %B) {
 ; CHECK-LABEL: 'neg_step_ForwardButPreventsForwarding'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Forward loop carried data dependence that prevents store-to-load forwarding.
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Forward:
+; CHECK-NEXT:        ForwardButPreventsForwarding:
 ; CHECK-NEXT:            store i32 0, ptr %gep.A, align 4 ->
 ; CHECK-NEXT:            %l = load i32, ptr %gep.A.plus.1, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
new file mode 100644
index 0000000000000..65f94a7d8fdb4
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -0,0 +1,65 @@
+; REQUIRES: asserts
+; RUN: opt -passes='print<access-info>' -debug-only=loop-accesses -disable-output < %s 2>&1 | FileCheck %s
+
+; void negative_step(int *A) {
+;  for (int i = 1022; i >= 0; i--)
+;    A[i+1] = A[i] + 1;
+; }
+
+; CHECK: LAA: Found a loop in negative_step: loop
+; CHECK: LAA: Checking memory dependencies
+; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
+; CHECK-NEXT: LAA: Distance for   store i32 %add, ptr %gep.A.plus.1, align 4 to   %l = load i32, ptr %gep.A, align 4: -4
+; CHECK-NEXT: LAA: Dependence is negative
+
+define void @negative_step(ptr nocapture %A) {
+entry:
+  %A.plus.1 = getelementptr i32, ptr %A, i64 1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 1022, %entry ], [ %iv.next, %loop ]
+  %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+  %l = load i32, ptr %gep.A, align 4
+  %add = add nsw i32 %l, 1
+  %gep.A.plus.1 = getelementptr i32, ptr %A.plus.1, i64 %iv
+  store i32 %add, ptr %gep.A.plus.1, align 4
+  %iv.next = add nsw i64 %iv, -1
+  %cmp.not = icmp eq i64 %iv, 0
+  br i1 %cmp.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; void positive_step(int *A) {
+;  for (int i = 1; i < 1024; i++)
+;    A[i-1] = A[i] + 1;
+; }
+
+; CHECK: LAA: Found a loop in positive_step: loop
+; CHECK: LAA: Checking memory dependencies
+; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
+; CHECK-NEXT: LAA: Distance for   %l = load i32, ptr %gep.A, align 4 to   store i32 %add, ptr %gep.A.minus.1, align 4: -4
+; CHECK-NEXT: LAA: Dependence is negative
+
+define void @positive_step(ptr nocapture %A) {
+entry:
+  %A.minus.1 = getelementptr i32, ptr %A, i64 -1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
+  %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
+  %l = load i32, ptr %gep.A, align 4
+  %add = add nsw i32 %l, 1
+  %gep.A.minus.1 = getelementptr i32, ptr %A.minus.1, i64 %iv
+  store i32 %add, ptr %gep.A.minus.1, align 4
+  %iv.next = add nsw i64 %iv, 1
+  %cmp.not = icmp eq i64 %iv, 1024
+  br i1 %cmp.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+



More information about the llvm-commits mailing list