[llvm] d897ea3 - LAA: check nusw on GEP in place of inbounds (#112223)

Tue Oct 22 01:58:58 PDT 2024

Author: Ramkumar Ramachandra
Date: 2024-10-22T09:58:54+01:00
New Revision: d897ea37dbac66d51794938af4f112e05fb61b05

URL: https://github.com/llvm/llvm-project/commit/d897ea37dbac66d51794938af4f112e05fb61b05
DIFF: https://github.com/llvm/llvm-project/commit/d897ea37dbac66d51794938af4f112e05fb61b05.diff

LOG: LAA: check nusw on GEP in place of inbounds (#112223)

With the introduction of the nusw flag in GEPNoWrapFlags, it should be
safe to weaken the check in LoopAccessAnalysis to just check the nusw
flag on the GEP, instead of inbounds.

Added: 
    

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 0d2ee6fb674be6..6f00d5034fbd57 100644

--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1407,9 +1407,9 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
   // Look through the potentially overflowing instruction to try to prove
   // non-wrapping for the *specific* value of Ptr.
 
-  // The arithmetic implied by an inbounds GEP can't overflow.
+  // The arithmetic implied by an nusw GEP can't overflow.
   const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!GEP || !GEP->isInBounds())
+  if (!GEP || !GEP->hasNoUnsignedSignedWrap())
     return false;
 
   // Make sure there is only one non-const index and analyze that.
@@ -1511,12 +1511,12 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
   if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
     return Stride;
 
-  // An inbounds getelementptr that is a AddRec with a unit stride
+  // An nusw getelementptr that is a AddRec with a unit stride
   // cannot wrap per definition.  If it did, the result would be poison
   // and any memory access dependent on it would be immediate UB
   // when executed.
   if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-      GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
+      GEP && GEP->hasNoUnsignedSignedWrap() && (Stride == 1 || Stride == -1))
     return Stride;
 
   // If the null pointer is undefined, then a access sequence which would

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 1585c7b5628063..525995156481c8 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -48,6 +48,51 @@ exit:                                          ; preds = %loop
   ret void
 }
 
+; A forwarding in the presence of symbolic strides,
+; with nusw instead of inbounds on the GEPs.
+define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
+; CHECK-LABEL: 'single_stride_nusw'
+; CHECK-NEXT:    loop:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Backward loop carried data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Backward:
+; CHECK-NEXT:            %load = load i32, ptr %gep.A, align 4 ->
+; CHECK-NEXT:            store i32 %add, ptr %gep.A.next, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-NEXT:      Equal predicate: %stride == 1
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:      [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
+; CHECK-NEXT:        {%A,+,(4 * %stride)}<%loop>
+; CHECK-NEXT:        --> {%A,+,4}<%loop>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %mul = mul i64 %iv, %stride
+  %gep.A = getelementptr nusw i32, ptr %A, i64 %mul
+  %load = load i32, ptr %gep.A, align 4
+  %gep.B = getelementptr nusw i32, ptr %B, i64 %iv
+  %load_1 = load i32, ptr %gep.B, align 4
+  %add = add i32 %load_1, %load
+  %iv.next = add nuw nsw i64 %iv, 1
+  %gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next
+  store i32 %add, ptr %gep.A.next, align 4
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %exit, label %loop
+
+exit:                                          ; preds = %loop
+  ret void
+}
+
 ; Similar to @single_stride, but with struct types.
 define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
 ; CHECK-LABEL: 'single_stride_struct'