[llvm] [LAA] Perform checks for no-wrap separately from getPtrStride. (PR #126971)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 13:24:37 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/126971
>From 1a61c4f9772acefc743cc29cde93e5144d877124 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 12 Feb 2025 21:56:08 +0100
Subject: [PATCH 1/2] [LAA] Perform checks for no-wrap separately from
getPtrStride.
Reorganize the code in isNoWrap to perform the no-wrap checks without
relying on getPtrStride directly. getPtrStride now uses isNoWrap.
The new structure allows deriving no-wrap in more cases in LAA, because
there are some cases where getPtrStride bails out early because it cannot
return a constant stride, but we can still prove no-wrap for the pointer.
An example are AddRecs with non-ConstantInt strides with inbound GEPs,
in the improved test cases.
This enables vectorization with runtime checks in a few more cases.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 81 ++++++++++------
...untime-checks-after-dependence-analysis.ll | 97 ++++++++++++++++---
2 files changed, 134 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index cd1294b3c5276..0ff77379f9f7b 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -855,16 +855,60 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
return Stride;
}
+static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
+ PredicatedScalarEvolution &PSE, const Loop *L);
+
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
- Type *AccessTy, Loop *L, bool Assume) {
- const SCEV *PtrScev = PSE.getSCEV(Ptr);
+ Type *AccessTy, const Loop *L, bool Assume,
+ std::optional<int64_t> Stride = std::nullopt) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
- PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Ptr);
+ if (!AR)
+ return false;
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ if (isNoWrapAddRec(Ptr, AR, PSE, L))
+ return true;
+
+ // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
+ // the distance between the previously accessed location and the wrapped
+ // location will be larger than half the pointer index type space. In that
+ // case, the GEP would be poison and any memory access dependent on it would
+ // be immediate UB when executed.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ GEP && GEP->hasNoUnsignedSignedWrap())
+ return true;
+
+ // If the null pointer is undefined, then a access sequence which would
+ // otherwise access it can be assumed not to unsigned wrap. Note that this
+ // assumes the object in memory is aligned to the natural alignment.
+ if (!Stride)
+ Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
+ if (Stride) {
+ unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+ if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
+ (*Stride == 1 || *Stride == -1))
+ return true;
+ }
+
+ if (Assume) {
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
+ << "LAA: Pointer: " << *Ptr << "\n"
+ << "LAA: SCEV: " << *AR << "\n"
+ << "LAA: Added an overflow assumption\n");
+ return true;
+ }
+
+ return PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
}
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
@@ -1505,36 +1549,9 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
if (!ShouldCheckWrap || !Stride)
return Stride;
- // The address calculation must not wrap. Otherwise, a dependence could be
- // inverted.
- if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
+ if (isNoWrap(PSE, StridesMap, Ptr, AccessTy, Lp, Assume, Stride))
return Stride;
- // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
- // the distance between the previously accessed location and the wrapped
- // location will be larger than half the pointer index type space. In that
- // case, the GEP would be poison and any memory access dependent on it would
- // be immediate UB when executed.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- GEP && GEP->hasNoUnsignedSignedWrap())
- return Stride;
-
- // If the null pointer is undefined, then a access sequence which would
- // otherwise access it can be assumed not to unsigned wrap. Note that this
- // assumes the object in memory is aligned to the natural alignment.
- unsigned AddrSpace = Ty->getPointerAddressSpace();
- if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
- (Stride == 1 || Stride == -1))
- return Stride;
-
- if (Assume) {
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
- LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
- return Stride;
- }
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index e42392df3e93e..26c571b9cb63a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -65,13 +65,38 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5(ptr %a, ptr %b, i64 %offset, i64 %n) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP4]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP5]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Against group ([[GRP6]]):
+; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP5]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Group [[GRP6]]:
+; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b))
+; CHECK-NEXT: Member: {%b,+,5}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%b,+,5}<%loop> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
@@ -102,10 +127,34 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP7]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP9:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP8]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Against group ([[GRP9]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP7]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP8]]:
+; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT: Member: {%b,+,1}<%loop>
+; CHECK-NEXT: Group [[GRP9]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -144,10 +193,34 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP10:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP11:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP10]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP11]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Against group ([[GRP12]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP10]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP11]]:
+; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT: Member: {%b,+,1}<%loop>
+; CHECK-NEXT: Group [[GRP12]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -189,28 +262,28 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: Comparing group ([[GRP13:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group ([[GRP14:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Check 1:
-; CHECK-NEXT: Comparing group ([[GRP4]]):
+; CHECK-NEXT: Comparing group ([[GRP13]]):
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group ([[GRP15:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
; CHECK-NEXT: Check 2:
-; CHECK-NEXT: Comparing group ([[GRP5]]):
+; CHECK-NEXT: Comparing group ([[GRP14]]):
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
-; CHECK-NEXT: Against group ([[GRP6]]):
+; CHECK-NEXT: Against group ([[GRP15]]):
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: Group [[GRP13]]:
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
-; CHECK-NEXT: Group [[GRP5]]:
+; CHECK-NEXT: Group [[GRP14]]:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
-; CHECK-NEXT: Group [[GRP6]]:
+; CHECK-NEXT: Group [[GRP15]]:
; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b))
; CHECK-NEXT: Member: {%b,+,8}<%loop>
; CHECK-EMPTY:
>From 4e400885f54eb2138dae7662425f6bd348581c25 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 13 Feb 2025 22:24:08 +0100
Subject: [PATCH 2/2] !fixup address latest comments, thanks!
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 0ff77379f9f7b..0ee4b837b0f2a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -868,10 +868,11 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
return true;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
- if (Assume && !AR)
+ if (!AR) {
+ if (!Assume)
+ return false;
AR = PSE.getAsAddRec(Ptr);
- if (!AR)
- return false;
+ }
// The address calculation must not wrap. Otherwise, a dependence could be
// inverted.
@@ -887,15 +888,15 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
GEP && GEP->hasNoUnsignedSignedWrap())
return true;
- // If the null pointer is undefined, then a access sequence which would
- // otherwise access it can be assumed not to unsigned wrap. Note that this
- // assumes the object in memory is aligned to the natural alignment.
if (!Stride)
Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
if (Stride) {
+ // If the null pointer is undefined, then a access sequence which would
+ // otherwise access it can be assumed not to unsigned wrap. Note that this
+ // assumes the object in memory is aligned to the natural alignment.
unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
- (*Stride == 1 || *Stride == -1))
+ (Stride == 1 || Stride == -1))
return true;
}
More information about the llvm-commits
mailing list