[llvm] [LAA] Perform checks for no-wrap separately from getPtrStride. (PR #126971)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 12:58:24 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Reorganize the code in isNoWrap to perform the no-wrap checks without relying on getPtrStride directly. getPtrStride now uses isNoWrap.
The new structure allows deriving no-wrap in more cases in LAA, because there are some cases where getPtrStride bails out early because it cannot return a constant stride, but we can still prove no-wrap for the pointer.
An example are AddRecs with non-ConstantInt strides with inbound GEPs, in the improved test cases.
This enables vectorization with runtime checks in a few more cases.
---
Full diff: https://github.com/llvm/llvm-project/pull/126971.diff
2 Files Affected:
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+93-68)
- (modified) llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll (+85-12)
``````````diff
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 3202ba81be78e..84401d1a6d751 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -813,16 +813,102 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
return AR->isAffine();
}
+/// Try to compute the stride for \p AR. Used by getPtrStride.
+static std::optional<int64_t>
+getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
+ Value *Ptr, PredicatedScalarEvolution &PSE) {
+ // The access function must stride over the innermost loop.
+ if (Lp != AR->getLoop()) {
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
+ << *Ptr << " SCEV: " << *AR << "\n");
+ return std::nullopt;
+ }
+
+ // Check the step is constant.
+ const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
+
+ // Calculate the pointer stride and check if it is constant.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) {
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
+ << " SCEV: " << *AR << "\n");
+ return std::nullopt;
+ }
+
+ const auto &DL = Lp->getHeader()->getDataLayout();
+ TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
+ int64_t Size = AllocSize.getFixedValue();
+ const APInt &APStepVal = C->getAPInt();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return std::nullopt;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ // Strided access.
+ int64_t Stride = StepVal / Size;
+ int64_t Rem = StepVal % Size;
+ if (Rem)
+ return std::nullopt;
+
+ return Stride;
+}
+
+static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
+ PredicatedScalarEvolution &PSE, const Loop *L);
+
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
- Type *AccessTy, Loop *L, bool Assume) {
- const SCEV *PtrScev = PSE.getSCEV(Ptr);
+ Type *AccessTy, const Loop *L, bool Assume,
+ std::optional<int64_t> Stride = std::nullopt) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
- PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Ptr);
+ if (!AR)
+ return false;
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ if (isNoWrapAddRec(Ptr, AR, PSE, L))
+ return true;
+
+ // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
+ // the distance between the previously accessed location and the wrapped
+ // location will be larger than half the pointer index type space. In that
+ // case, the GEP would be poison and any memory access dependent on it would
+ // be immediate UB when executed.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ GEP && GEP->hasNoUnsignedSignedWrap())
+ return true;
+
+ // If the null pointer is undefined, then a access sequence which would
+ // otherwise access it can be assumed not to unsigned wrap. Note that this
+ // assumes the object in memory is aligned to the natural alignment.
+ if (!Stride)
+ Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
+ if (Stride) {
+ unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+ if (!NullPointerIsDefined(L->getHeader()->getParent(), AddrSpace) &&
+ (*Stride == 1 || *Stride == -1))
+ return true;
+ }
+
+ if (Assume) {
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
+ << "LAA: Pointer: " << *Ptr << "\n"
+ << "LAA: SCEV: " << *AR << "\n"
+ << "LAA: Added an overflow assumption\n");
+ return true;
+ }
+
+ return PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
}
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
@@ -1458,74 +1544,13 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return std::nullopt;
}
- // The access function must stride over the innermost loop.
- if (Lp != AR->getLoop()) {
- LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
- << *Ptr << " SCEV: " << *AR << "\n");
- return std::nullopt;
- }
-
- // Check the step is constant.
- const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
-
- // Calculate the pointer stride and check if it is constant.
- const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
- if (!C) {
- LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
- << " SCEV: " << *AR << "\n");
- return std::nullopt;
- }
-
- const auto &DL = Lp->getHeader()->getDataLayout();
- TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
- int64_t Size = AllocSize.getFixedValue();
- const APInt &APStepVal = C->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return std::nullopt;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- // Strided access.
- int64_t Stride = StepVal / Size;
- int64_t Rem = StepVal % Size;
- if (Rem)
- return std::nullopt;
-
- if (!ShouldCheckWrap)
- return Stride;
-
- // The address calculation must not wrap. Otherwise, a dependence could be
- // inverted.
- if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
- return Stride;
-
- // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
- // the distance between the previously accessed location and the wrapped
- // location will be larger than half the pointer index type space. In that
- // case, the GEP would be poison and any memory access dependent on it would
- // be immediate UB when executed.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- GEP && GEP->hasNoUnsignedSignedWrap())
+ auto Stride = getStrideFromAddRec(AR, Lp, AccessTy, Ptr, PSE);
+ if (!ShouldCheckWrap || !Stride)
return Stride;
- // If the null pointer is undefined, then a access sequence which would
- // otherwise access it can be assumed not to unsigned wrap. Note that this
- // assumes the object in memory is aligned to the natural alignment.
- unsigned AddrSpace = Ty->getPointerAddressSpace();
- if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
- (Stride == 1 || Stride == -1))
+ if (isNoWrap(PSE, StridesMap, Ptr, AccessTy, Lp, Assume, Stride))
return Stride;
- if (Assume) {
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
- LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
- return Stride;
- }
LLVM_DEBUG(
dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
<< *Ptr << " SCEV: " << *AR << "\n");
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index e42392df3e93e..26c571b9cb63a 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -65,13 +65,38 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5(ptr %a, ptr %b, i64 %offset, i64 %n) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step5'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP4]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP5]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Against group ([[GRP6]]):
+; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP5]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Group [[GRP6]]:
+; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b))
+; CHECK-NEXT: Member: {%b,+,5}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
+; CHECK-NEXT: {%b,+,5}<%loop> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
@@ -102,10 +127,34 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_step_not_constant'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP7]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP9:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP8]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Against group ([[GRP9]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP7]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP8]]:
+; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT: Member: {%b,+,1}<%loop>
+; CHECK-NEXT: Group [[GRP9]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -144,10 +193,34 @@ exit:
define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant(ptr %a, ptr %b, i64 %offset, i64 %n, i64 %s) {
; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_step_not_constant'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: cannot check memory dependencies at runtime
+; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP10:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP11:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Check 1:
+; CHECK-NEXT: Comparing group ([[GRP10]]):
+; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
+; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
+; CHECK-NEXT: Check 2:
+; CHECK-NEXT: Comparing group ([[GRP11]]):
+; CHECK-NEXT: %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv2
+; CHECK-NEXT: Against group ([[GRP12]]):
+; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP10]]:
+; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
+; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
+; CHECK-NEXT: Group [[GRP11]]:
+; CHECK-NEXT: (Low: %b High: (3 + %n + %b))
+; CHECK-NEXT: Member: {%b,+,1}<%loop>
+; CHECK-NEXT: Group [[GRP12]]:
+; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -189,28 +262,28 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: Comparing group ([[GRP13:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group ([[GRP14:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Check 1:
-; CHECK-NEXT: Comparing group ([[GRP4]]):
+; CHECK-NEXT: Comparing group ([[GRP13]]):
; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv
-; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group ([[GRP15:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
; CHECK-NEXT: Check 2:
-; CHECK-NEXT: Comparing group ([[GRP5]]):
+; CHECK-NEXT: Comparing group ([[GRP14]]):
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
-; CHECK-NEXT: Against group ([[GRP6]]):
+; CHECK-NEXT: Against group ([[GRP15]]):
; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: Group [[GRP13]]:
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
-; CHECK-NEXT: Group [[GRP5]]:
+; CHECK-NEXT: Group [[GRP14]]:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
-; CHECK-NEXT: Group [[GRP6]]:
+; CHECK-NEXT: Group [[GRP15]]:
; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b))
; CHECK-NEXT: Member: {%b,+,8}<%loop>
; CHECK-EMPTY:
``````````
</details>
https://github.com/llvm/llvm-project/pull/126971
More information about the llvm-commits
mailing list