[llvm] 044b528 - [LAA] Perform checks for no-wrap separately from getPtrStride. (#126971)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 14 11:06:42 PST 2025


Author: Florian Hahn
Date: 2025-02-14T20:06:37+01:00
New Revision: 044b52832a779ac49fec9472e9e374c1b4609450

URL: https://github.com/llvm/llvm-project/commit/044b52832a779ac49fec9472e9e374c1b4609450
DIFF: https://github.com/llvm/llvm-project/commit/044b52832a779ac49fec9472e9e374c1b4609450.diff

LOG: [LAA] Perform checks for no-wrap separately from getPtrStride. (#126971)

Reorganize the code in isNoWrap to perform the no-wrap checks without
relying on getPtrStride directly. getPtrStride now uses isNoWrap.

The new structure allows deriving no-wrap in more cases in LAA, because
there are some cases where getPtrStride bails out early because it
cannot return a constant stride, but we can still prove no-wrap for the
pointer.

An example are AddRecs with non-ConstantInt strides with inbound GEPs,
in the improved test cases.

This enables vectorization with runtime checks in a few more cases.

PR: https://github.com/llvm/llvm-project/pull/126971

Added: 
    

Modified: 
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index cd1294b3c5276..0ee4b837b0f2a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -855,16 +855,61 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
   return Stride;
 }
 
+static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
+                           PredicatedScalarEvolution &PSE, const Loop *L);
+
 /// Check whether a pointer address cannot wrap.
 static bool isNoWrap(PredicatedScalarEvolution &PSE,
                      const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
-                     Type *AccessTy, Loop *L, bool Assume) {
-  const SCEV *PtrScev = PSE.getSCEV(Ptr);
+                     Type *AccessTy, const Loop *L, bool Assume,
+                     std::optional<int64_t> Stride = std::nullopt) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
 
-  return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
-         PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+  if (!AR) {
+    if (!Assume)
+      return false;
+    AR = PSE.getAsAddRec(Ptr);
+  }
+
+  // The address calculation must not wrap. Otherwise, a dependence could be
+  // inverted.
+  if (isNoWrapAddRec(Ptr, AR, PSE, L))
+    return true;
+
+  // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
+  // the distance between the previously accessed location and the wrapped
+  // location will be larger than half the pointer index type space. In that
+  // case, the GEP would be  poison and any memory access dependent on it would
+  // be immediate UB when executed.
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+      GEP && GEP->hasNoUnsignedSignedWrap())
+    return true;
+
+  if (!Stride)
+    Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
+  if (Stride) {
+    // If the null pointer is undefined, then a access sequence which would
+    // otherwise access it can be assumed not to unsigned wrap.  Note that this
+    // assumes the object in memory is aligned to the natural alignment.
+    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+    if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
+        (Stride == 1 || Stride == -1))
+      return true;
+  }
+
+  if (Assume) {
+    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+    LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
+                      << "LAA:   Pointer: " << *Ptr << "\n"
+                      << "LAA:   SCEV: " << *AR << "\n"
+                      << "LAA:   Added an overflow assumption\n");
+    return true;
+  }
+
+  return PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
 }
 
 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
@@ -1505,36 +1550,9 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
   if (!ShouldCheckWrap || !Stride)
     return Stride;
 
-  // The address calculation must not wrap. Otherwise, a dependence could be
-  // inverted.
-  if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
-    return Stride;
-
-  // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
-  // the distance between the previously accessed location and the wrapped
-  // location will be larger than half the pointer index type space. In that
-  // case, the GEP would be  poison and any memory access dependent on it would
-  // be immediate UB when executed.
-  if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-      GEP && GEP->hasNoUnsignedSignedWrap())
-    return Stride;
-
-  // If the null pointer is undefined, then a access sequence which would
-  // otherwise access it can be assumed not to unsigned wrap.  Note that this
-  // assumes the object in memory is aligned to the natural alignment.
-  unsigned AddrSpace = Ty->getPointerAddressSpace();
-  if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
-      (Stride == 1 || Stride == -1))
+  if (isNoWrap(PSE, StridesMap, Ptr, AccessTy, Lp, Assume, Stride))
     return Stride;
 
-  if (Assume) {
-    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
-    LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
-                      << "LAA:   Pointer: " << *Ptr << "\n"
-                      << "LAA:   SCEV: " << *AR << "\n"
-                      << "LAA:   Added an overflow assumption\n");
-    return Stride;
-  }
   LLVM_DEBUG(
       dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
              << *Ptr << " SCEV: " << *AR << "\n");

diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index e42392df3e93e..26c571b9cb63a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -65,13 +65,38 @@ exit:
 define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5(ptr %a, ptr %b, i64 %offset, i64 %n) {
 ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: cannot check memory dependencies at runtime
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group ([[GRP4]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.b = getelementptr i8, ptr %b, i64 %iv2
+; CHECK-NEXT:      Check 2:
+; CHECK-NEXT:        Comparing group ([[GRP5]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:        Against group ([[GRP6]]):
+; CHECK-NEXT:          %gep.b = getelementptr i8, ptr %b, i64 %iv2
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP5]]:
+; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT:        Group [[GRP6]]:
+; CHECK-NEXT:          (Low: %b High: (-1 + (5 * %n) + %b))
+; CHECK-NEXT:            Member: {%b,+,5}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
+; CHECK-NEXT:      {%b,+,5}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
@@ -102,10 +127,34 @@ exit:
 define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
 ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: cannot check memory dependencies at runtime
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP7:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP8:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group ([[GRP7]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP9:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:      Check 2:
+; CHECK-NEXT:        Comparing group ([[GRP8]]):
+; CHECK-NEXT:          %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT:        Against group ([[GRP9]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP7]]:
+; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP8]]:
+; CHECK-NEXT:          (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT:            Member: {%b,+,1}<%loop>
+; CHECK-NEXT:        Group [[GRP9]]:
+; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -144,10 +193,34 @@ exit:
 define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
 ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: cannot check memory dependencies at runtime
+; CHECK-NEXT:      Memory dependences are safe with run-time checks
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Check 0:
+; CHECK-NEXT:        Comparing group ([[GRP10:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP11:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT:      Check 1:
+; CHECK-NEXT:        Comparing group ([[GRP10]]):
+; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT:        Against group ([[GRP12:0x[0-9a-f]+]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT:      Check 2:
+; CHECK-NEXT:        Comparing group ([[GRP11]]):
+; CHECK-NEXT:          %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT:        Against group ([[GRP12]]):
+; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Grouped accesses:
+; CHECK-NEXT:        Group [[GRP10]]:
+; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT:        Group [[GRP11]]:
+; CHECK-NEXT:          (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT:            Member: {%b,+,1}<%loop>
+; CHECK-NEXT:        Group [[GRP12]]:
+; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -189,28 +262,28 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:      Run-time memory checks:
 ; CHECK-NEXT:      Check 0:
-; CHECK-NEXT:        Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Comparing group ([[GRP13:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP14:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
 ; CHECK-NEXT:      Check 1:
-; CHECK-NEXT:        Comparing group ([[GRP4]]):
+; CHECK-NEXT:        Comparing group ([[GRP13]]):
 ; CHECK-NEXT:          %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT:        Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT:        Against group ([[GRP15:0x[0-9a-f]+]]):
 ; CHECK-NEXT:          %gep.b = getelementptr float, ptr %b, i64 %iv2
 ; CHECK-NEXT:      Check 2:
-; CHECK-NEXT:        Comparing group ([[GRP5]]):
+; CHECK-NEXT:        Comparing group ([[GRP14]]):
 ; CHECK-NEXT:          %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
-; CHECK-NEXT:        Against group ([[GRP6]]):
+; CHECK-NEXT:        Against group ([[GRP15]]):
 ; CHECK-NEXT:          %gep.b = getelementptr float, ptr %b, i64 %iv2
 ; CHECK-NEXT:      Grouped accesses:
-; CHECK-NEXT:        Group [[GRP4]]:
+; CHECK-NEXT:        Group [[GRP13]]:
 ; CHECK-NEXT:          (Low: %a High: ((4 * %n) + %a))
 ; CHECK-NEXT:            Member: {%a,+,4}<nuw><%loop>
-; CHECK-NEXT:        Group [[GRP5]]:
+; CHECK-NEXT:        Group [[GRP14]]:
 ; CHECK-NEXT:          (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
 ; CHECK-NEXT:            Member: {((4 * %offset) + %a),+,4}<%loop>
-; CHECK-NEXT:        Group [[GRP6]]:
+; CHECK-NEXT:        Group [[GRP15]]:
 ; CHECK-NEXT:          (Low: %b High: (-4 + (8 * %n) + %b))
 ; CHECK-NEXT:            Member: {%b,+,8}<%loop>
 ; CHECK-EMPTY:


        


More information about the llvm-commits mailing list