[llvm] LAA: check nusw on GEP in place of inbounds (PR #112223)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 11:45:25 PDT 2024


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/112223

>From 8f4953c505a0a67eaa5b73e0c27f9dddf2b299c0 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 14 Oct 2024 16:39:26 +0100
Subject: [PATCH] LAA: check nusw on GEP in place of inbounds

With the introduction of the nusw flag in GEPNoWrapFlags, it should be
safe to weaken the check in LoopAccessAnalysis to just check the nusw
flag on the GEP, instead of inbounds.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  8 ++--
 .../LoopAccessAnalysis/symbolic-stride.ll     | 45 +++++++++++++++++++
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index d35bf6818d4379..163b22f16639a4 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1412,9 +1412,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
   // Look through the potentially overflowing instruction to try to prove
   // non-wrapping for the *specific* value of Ptr.
 
-  // The arithmetic implied by an inbounds GEP can't overflow.
+  // The arithmetic implied by an nusw GEP can't overflow.
   const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!GEP || !GEP->isInBounds())
+  if (!GEP || !GEP->hasNoUnsignedSignedWrap())
     return false;
 
   // Make sure there is only one non-const index and analyze that.
@@ -1516,12 +1516,12 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
   if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
     return Stride;
 
-  // An inbounds getelementptr that is a AddRec with a unit stride
+  // An nusw getelementptr that is a AddRec with a unit stride
   // cannot wrap per definition.  If it did, the result would be poison
   // and any memory access dependent on it would be immediate UB
   // when executed.
   if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-      GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
+      GEP && GEP->hasNoUnsignedSignedWrap() && (Stride == 1 || Stride == -1))
     return Stride;
 
   // If the null pointer is undefined, then a access sequence which would
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 1585c7b5628063..525995156481c8 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -48,6 +48,51 @@ exit:                                          ; preds = %loop
   ret void
 }
 
+; A forwarding in the presence of symbolic strides,
+; with nusw instead of inbounds on the GEPs.
+define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
+; CHECK-LABEL: 'single_stride_nusw'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Backward loop carried data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Backward:
+; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
+; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-NEXT:      Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:      [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
+; CHECK-NEXT:        {%A,+,(4 * %stride)}<%loop>
+; CHECK-NEXT:        --> {%A,+,4}<%loop>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %mul = mul i64 %iv, %stride
+  %gep.A = getelementptr nusw i32, ptr %A, i64 %mul
+  %load = load i32, ptr %gep.A, align 4
+  %gep.B = getelementptr nusw i32, ptr %B, i64 %iv
+  %load_1 = load i32, ptr %gep.B, align 4
+  %add = add i32 %load_1, %load
+  %iv.next = add nuw nsw i64 %iv, 1
+  %gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next
+  store i32 %add, ptr %gep.A.next, align 4
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %exit, label %loop
+
+exit:                                          ; preds = %loop
+  ret void
+}
+
 ; Similar to @single_stride, but with struct types.
 define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
 ; CHECK-LABEL: 'single_stride_struct'



More information about the llvm-commits mailing list