[llvm] [LAA] Be more careful when evaluating AddRecs at symbolic max BTC. (PR #128061)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 09:13:02 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/128061
>From 49704a23c86c49dad8cd168952e3159c22ea8bf6 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 29 Aug 2024 12:21:20 +0100
Subject: [PATCH 1/7] [LAA] Be more careful when evaluating AddRecs at symbolic
max BTC.
Evaluating AR at the symbolic max BTC may wrap and create an expression
that is less than the start of the AddRec due to wrapping (for example
consider MaxBTC = -2).
If that's the case, set ScEnd to -(EltSize + 1). ScEnd will get
incremented by EltSize before returning, so this effectively
sets ScEnd to unsigned max. Note that LAA separately checks that
accesses cannot not wrap, so unsigned max represents an upper bound.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 4 +-
llvm/lib/Analysis/Loads.cpp | 5 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 ++++++---
...aluate-at-backedge-taken-count-wrapping.ll | 92 +++++++++++++++++++
...bolic-max-backedge-taken-count-may-wrap.ll | 6 +-
5 files changed, 133 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 2a44ec8032236..191c8399fa49c 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -915,8 +915,8 @@ LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
- ScalarEvolution *SE,
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
+ const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 71a75b496455a..249b3916f527b 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -319,11 +319,14 @@ bool llvm::isDereferenceableAndAlignedInLoop(
const SCEV *MaxBECount =
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
+ const SCEV *SymbolicMaxBECount =
+ Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : SE.getConstantMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), MaxBECount, SymbolicMaxBECount, &SE, nullptr);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 00cdb66d8b779..53d78ff7327ac 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -189,8 +189,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
}
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
- ScalarEvolution *SE,
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
+ const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +206,31 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const SCEV *ScStart;
const SCEV *ScEnd;
+ auto &DL = Lp->getHeader()->getDataLayout();
+ Type *IdxTy = DL.getIndexType(PtrExpr->getType());
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
if (SE->isLoopInvariant(PtrExpr, Lp)) {
ScStart = ScEnd = PtrExpr;
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
ScStart = AR->getStart();
- ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
+ if (!isa<SCEVCouldNotCompute>(BTC))
+ // Evaluating AR at an exact BTC is safe: LAA separately checks that
+ // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
+ // the loop either triggers UB when executing a memory access with a
+ // poison pointer or the wrapping/poisoned pointer is not used.
+ ScEnd = AR->evaluateAtIteration(BTC, *SE);
+ else {
+ // Evaluating AR at MaxBTC may wrap and create an expression that is less
+ // than the start of the AddRec due to wrapping (for example consider
+ // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
+ // will get incremented by EltSize before returning, so this effectively
+ // sets ScEnd to unsigned max. Note that LAA separately checks that
+ // accesses cannot not wrap, so unsigned max represents an upper bound.
+ ScEnd = AR->evaluateAtIteration(SymbolicMaxBTC, *SE);
+ if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
+ ScEnd = SE->getNegativeSCEV(
+ SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
+ }
const SCEV *Step = AR->getStepRecurrence(*SE);
// For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +252,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
// Add the size of the pointed element to ScEnd.
- auto &DL = Lp->getHeader()->getDataLayout();
- Type *IdxTy = DL.getIndexType(PtrExpr->getType());
- const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +267,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
unsigned DepSetId, unsigned ASId,
PredicatedScalarEvolution &PSE,
bool NeedsFreeze) {
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *BTC = PSE.getBackedgeTakenCount();
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
- Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1907,11 +1926,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// required for correctness.
if (SE.isLoopInvariant(Src, InnermostLoop) ||
SE.isLoopInvariant(Sink, InnermostLoop)) {
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *BTC = PSE.getBackedgeTakenCount();
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
+ InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &PointerBounds);
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
+ InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
new file mode 100644
index 0000000000000..d58dd38d9fef8
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; Note: The datalayout for the test specifies a 32 bit index type.
+
+; No UB: accessing last valid byte, pointer after the object
+; doesnt wrap (%p + 2147483647).
+define void @pointer_after_object_does_not_wrap(i32 %y, ptr %s, ptr %p) {
+; CHECK-LABEL: 'pointer_after_object_does_not_wrap'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP1]]:
+; CHECK-NEXT: (Low: (%y + %p) High: (2147483647 + %p))
+; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
+; CHECK-NEXT: Group [[GRP2]]:
+; CHECK-NEXT: (Low: (%y + %s) High: (2147483647 + %s))
+; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
+ %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
+ %load = load i8, ptr %gep1.iv, align 4
+ %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+ store i8 %load, ptr %gep2.iv, align 4
+ %iv.next = add nsw i32 %iv, 1
+ %c.2 = icmp slt i32 %iv.next, 2147483647
+ br i1 %c.2, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; UB: accessing %p + 2147483646 and p + 2147483647.
+; Pointer the past the object would wrap in signed.
+define void @pointer_after_object_would_wrap(i32 %y, ptr %s, ptr %p) {
+; CHECK-LABEL: 'pointer_after_object_would_wrap'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP3]]:
+; CHECK-NEXT: (Low: (%y + %p) High: (-2147483648 + %p))
+; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
+; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: (Low: (%y + %s) High: (-2147483648 + %s))
+; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
+ %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
+ %load = load i16, ptr %gep1.iv, align 4
+ %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+ store i16 %load, ptr %gep2.iv, align 4
+ %iv.next = add nsw i32 %iv, 1
+ %c.2 = icmp slt i32 %iv.next, 2147483647
+ br i1 %c.2, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
index 906bf5ce845f1..f6a45f878fe8f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
@@ -3,7 +3,6 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-; FIXME: Start == End for access group with AddRec.
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
@@ -17,7 +16,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x,
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: ((4 * %y) + %P))
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
@@ -44,7 +43,6 @@ exit:
ret void
}
-; FIXME: Start > End for access group with AddRec.
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
; CHECK-NEXT: loop:
@@ -58,7 +56,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: (-4 + (4 * %y) + %P))
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
>From 4c5d9ed5a48a5282f298338dd4ee7af50f11b75f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 20 Feb 2025 21:46:11 +0100
Subject: [PATCH 2/7] !fix formatting
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 53d78ff7327ac..27aa994cdbf7b 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -269,9 +269,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
bool NeedsFreeze) {
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const SCEV *BTC = PSE.getBackedgeTakenCount();
- const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
- Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &DC.getPointerBounds());
+ const auto &[ScStart, ScEnd] =
+ getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1928,12 +1928,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
SE.isLoopInvariant(Sink, InnermostLoop)) {
const SCEV *BTC = PSE.getBackedgeTakenCount();
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &PointerBounds);
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &PointerBounds);
+ const auto &[SrcStart_, SrcEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &PointerBounds);
+ const auto &[SinkStart_, SinkEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
>From 198b3b13eeaf1ea814560c78b6561548ff9bae09 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 24 Feb 2025 11:13:19 +0000
Subject: [PATCH 3/7] !fixup fix isDereferenceableAndAlignedInLoop.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 7 +-
llvm/lib/Analysis/Loads.cpp | 8 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 6 +-
...able-info-from-assumption-variable-size.ll | 88 +++++++++++++++++--
.../single_early_exit_live_outs.ll | 17 ----
5 files changed, 93 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 191c8399fa49c..1faf279ae2012 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -901,7 +901,10 @@ LLVM_ABI bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType = true);
-/// Calculate Start and End points of memory access.
+/// Calculate Start and End points of memory access using exact backedge taken
+/// count \p BTC if computable or maximum backedge taken count \p MaxBTC
+/// otherwise.
+///
/// Let's assume A is the first access and B is a memory access on N-th loop
/// iteration. Then B is calculated as:
/// B = A + Step*N .
@@ -916,7 +919,7 @@ LLVM_ABI bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
- const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
+ const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 249b3916f527b..41681222d57a3 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -319,14 +319,14 @@ bool llvm::isDereferenceableAndAlignedInLoop(
const SCEV *MaxBECount =
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
- const SCEV *SymbolicMaxBECount =
- Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
- : SE.getConstantMaxBackedgeTakenCount(L);
+ const SCEV *BECount = Predicates
+ ? SE.getPredicatedBackedgeTakenCount(L, *Predicates)
+ : SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), MaxBECount, SymbolicMaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 27aa994cdbf7b..ed78e89614626 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -190,7 +190,7 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
- const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
+ const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -226,7 +226,9 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// will get incremented by EltSize before returning, so this effectively
// sets ScEnd to unsigned max. Note that LAA separately checks that
// accesses cannot not wrap, so unsigned max represents an upper bound.
- ScEnd = AR->evaluateAtIteration(SymbolicMaxBTC, *SE);
+ // TODO: Use additional information to determine no-wrap including
+ // size/dereferencability info from the accessed object.
+ ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
ScEnd = SE->getNegativeSCEV(
SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
index 6b46197295e32..7a4a33a9d880b 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -22,8 +22,26 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> poison, i8 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> [[TMP9]], i8 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i8> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
@@ -105,8 +123,26 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -189,8 +225,26 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -271,8 +325,26 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index ec1e8fa1e1b33..1912e7ad73e23 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -2208,20 +2208,3 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[LOOP35]] = distinct !{[[LOOP35]], [[META2]], [[META1]]}
; CHECK: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
; CHECK: [[LOOP37]] = distinct !{[[LOOP37]], [[META2]], [[META1]]}
-; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[META1]], [[META2]]}
-; CHECK: [[LOOP39]] = distinct !{[[LOOP39]], [[META2]], [[META1]]}
-; CHECK: [[LOOP40]] = distinct !{[[LOOP40]], [[META1]], [[META2]]}
-; CHECK: [[LOOP41]] = distinct !{[[LOOP41]], [[META2]], [[META1]]}
-; CHECK: [[LOOP42]] = distinct !{[[LOOP42]], [[META1]], [[META2]]}
-; CHECK: [[LOOP43]] = distinct !{[[LOOP43]], [[META2]], [[META1]]}
-; CHECK: [[LOOP44]] = distinct !{[[LOOP44]], [[META1]], [[META2]]}
-; CHECK: [[LOOP45]] = distinct !{[[LOOP45]], [[META2]], [[META1]]}
-; CHECK: [[LOOP46]] = distinct !{[[LOOP46]], [[META1]], [[META2]]}
-; CHECK: [[LOOP47]] = distinct !{[[LOOP47]], [[META2]], [[META1]]}
-; CHECK: [[LOOP48]] = distinct !{[[LOOP48]], [[META1]], [[META2]]}
-; CHECK: [[LOOP49]] = distinct !{[[LOOP49]], [[META2]], [[META1]]}
-; CHECK: [[LOOP50]] = distinct !{[[LOOP50]], [[META1]], [[META2]]}
-; CHECK: [[LOOP51]] = distinct !{[[LOOP51]], [[META2]], [[META1]]}
-; CHECK: [[LOOP52]] = distinct !{[[LOOP52]], [[META1]], [[META2]]}
-; CHECK: [[LOOP53]] = distinct !{[[LOOP53]], [[META2]], [[META1]]}
-;.
>From 92d8bd6e56d17a93993ac0f85699a72dbf8fac18 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 12 Mar 2025 17:11:13 +0000
Subject: [PATCH 4/7] !fixup check object size to determine no-wrap.
---
llvm/lib/Analysis/Loads.cpp | 2 +
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 57 ++++++++++++++++--
.../early-exit-runtime-checks.ll | 11 ++--
...bolic-max-backedge-taken-count-may-wrap.ll | 12 ++--
.../single_early_exit_live_outs.ll | 58 +------------------
5 files changed, 67 insertions(+), 73 deletions(-)
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 41681222d57a3..880249588f0b2 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -333,6 +333,8 @@ bool llvm::isDereferenceableAndAlignedInLoop(
// Try to get the access size.
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
+ if (isa<SCEVCouldNotCompute>(PtrDiff))
+ return false;
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
Value *Base = nullptr;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ed78e89614626..9574c30dec62f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -188,6 +188,51 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
Members.push_back(Index);
}
+/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
+/// \p MaxBTC is guaranteed inbounds of the accessed object.
+static bool evaluateAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
+ const SCEV *MaxBTC,
+ ScalarEvolution &SE,
+ const DataLayout &DL) {
+ auto *PointerBase = SE.getPointerBase(AR->getStart());
+ auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
+ if (!StartPtr)
+ return false;
+ bool CheckForNonNull, CheckForFreed;
+ uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes(
+ DL, CheckForNonNull, CheckForFreed);
+
+ if (CheckForNonNull || CheckForFreed)
+ return false;
+
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
+ Step = SE.getNoopOrSignExtend(Step, WiderTy);
+ MaxBTC = SE.getNoopOrSignExtend(MaxBTC, WiderTy);
+ if (SE.isKnownPositive(Step)) {
+ // For positive steps, check if (AR->getStart() - StartPtr) + MaxBTC <=
+ // DerefBytes / Step
+ return SE.isKnownPredicate(
+ CmpInst::ICMP_ULE,
+ SE.getAddExpr(SE.getMinusSCEV(AR->getStart(), StartPtr), MaxBTC),
+ SE.getUDivExpr(SE.getConstant(WiderTy, DerefBytes), Step));
+ }
+ if (SE.isKnownNegative(Step)) {
+ // For negative steps, check using StartOffset == AR->getStart() - StartPtr:
+ // * StartOffset >= MaxBTC * Step
+ // * AND StartOffset <= DerefBytes / Step
+ auto *StartOffset = SE.getMinusSCEV(AR->getStart(), StartPtr);
+ return SE.isKnownPredicate(
+ CmpInst::ICMP_UGE, StartOffset,
+ SE.getMulExpr(MaxBTC, SE.getNegativeSCEV(Step))) &&
+ SE.isKnownPredicate(
+ CmpInst::ICMP_ULE, StartOffset,
+ SE.getUDivExpr(SE.getConstant(WiderTy, DerefBytes),
+ SE.getNegativeSCEV(Step)));
+ }
+ return false;
+}
+
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
const SCEV *MaxBTC, ScalarEvolution *SE,
@@ -226,12 +271,12 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// will get incremented by EltSize before returning, so this effectively
// sets ScEnd to unsigned max. Note that LAA separately checks that
// accesses cannot not wrap, so unsigned max represents an upper bound.
- // TODO: Use additional information to determine no-wrap including
- // size/dereferencability info from the accessed object.
- ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
- if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
- ScEnd = SE->getNegativeSCEV(
- SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
+ ScEnd = SE->getAddExpr(
+ SE->getNegativeSCEV(EltSizeSCEV),
+ SE->getSCEV(ConstantExpr::getIntToPtr(
+ ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
+ if (evaluateAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, *SE, DL))
+ ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
}
const SCEV *Step = AR->getStepRecurrence(*SE);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
index e235b73daa6a7..3ddcf9aebeebd 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
@@ -75,7 +75,7 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_not
; CHECK-NEXT: (Low: %B High: (2000 + %B))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (2000 + %A))
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -189,9 +189,9 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations_not
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %B High: (4004 + %B))
-; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (4004 + %A))
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -233,6 +233,7 @@ e.2:
ret i32 2
}
+
define i32 @not_all_exits_dominate_latch(ptr %A, ptr %B) {
; CHECK-LABEL: 'not_all_exits_dominate_latch'
; CHECK-NEXT: loop.header:
@@ -345,10 +346,10 @@ define i32 @b3_does_not_dominate_latch_not_known_deref(ptr %A, ptr %B) {
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: (4004 + %B))
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (4004 + %A))
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
index f6a45f878fe8f..7aa4ebf59217c 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
@@ -3,6 +3,7 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+; FIXME: Start == End for access group with AddRec.
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
@@ -16,7 +17,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x,
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr))
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
@@ -43,6 +44,7 @@ exit:
ret void
}
+; FIXME: Start > End for access group with AddRec.
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
; CHECK-NEXT: loop:
@@ -56,7 +58,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr))
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
@@ -135,8 +137,8 @@ exit:
ret i32 %res
}
-; FIXME: evaluating at symbolic max BTC wraps around to a positive
-; offset: (2 + (2 * %y) + %P)
+; Evaluating at symbolic max BTC wraps around to a positive
+; offset: (2 + (2 * %y) + %P).
define void @runtime_check_with_symbolic_max_wraps_to_positive_offset(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_wraps_to_positive_offset'
; CHECK-NEXT: loop:
@@ -150,7 +152,7 @@ define void @runtime_check_with_symbolic_max_wraps_to_positive_offset(ptr %P, pt
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: ((2 * %y) + %P) High: (2 + (2 * %y) + %P))
+; CHECK-NEXT: (Low: ((2 * %y) + %P) High: inttoptr (i32 -1 to ptr))
; CHECK-NEXT: Member: {((2 * %y) + %P),+,2}<%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 1912e7ad73e23..bd3d7f8f298a1 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -1971,35 +1971,6 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
-; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
-; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]]
-; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
-; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
-; CHECK: middle.split:
-; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
-; CHECK: vector.early.exit:
-; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true)
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]]
-; CHECK-NEXT: br label [[LOOP_END]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ]
-; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
@@ -2011,9 +1982,6 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP1]], !llvm.loop [[LOOP51:![0-9]+]]
-; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -2114,31 +2082,6 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
-; CHECK: middle.split:
-; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
-; CHECK: vector.early.exit:
-; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
-; CHECK-NEXT: br label [[LOOP_END]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
-; CHECK: loop.inc:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP53:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -2208,3 +2151,4 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[LOOP35]] = distinct !{[[LOOP35]], [[META2]], [[META1]]}
; CHECK: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
; CHECK: [[LOOP37]] = distinct !{[[LOOP37]], [[META2]], [[META1]]}
+
>From 2636025468b59373e12c3749c3a6b472bcc9e1f7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 26 Mar 2025 19:20:26 +0000
Subject: [PATCH 5/7] !fixup update after rebase, extend offset as well.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 8 +-
...able-info-from-assumption-variable-size.ll | 24 +++---
.../single_early_exit_live_outs.ll | 77 ++++++++++++++++++-
3 files changed, 93 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 9574c30dec62f..8726407242597 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -212,16 +212,18 @@ static bool evaluateAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
if (SE.isKnownPositive(Step)) {
// For positive steps, check if (AR->getStart() - StartPtr) + MaxBTC <=
// DerefBytes / Step
+ const SCEV *StartOffset = SE.getNoopOrSignExtend(
+ SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
return SE.isKnownPredicate(
- CmpInst::ICMP_ULE,
- SE.getAddExpr(SE.getMinusSCEV(AR->getStart(), StartPtr), MaxBTC),
+ CmpInst::ICMP_ULE, SE.getAddExpr(StartOffset, MaxBTC),
SE.getUDivExpr(SE.getConstant(WiderTy, DerefBytes), Step));
}
if (SE.isKnownNegative(Step)) {
// For negative steps, check using StartOffset == AR->getStart() - StartPtr:
// * StartOffset >= MaxBTC * Step
// * AND StartOffset <= DerefBytes / Step
- auto *StartOffset = SE.getMinusSCEV(AR->getStart(), StartPtr);
+ auto *StartOffset = SE.getNoopOrSignExtend(
+ SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
return SE.isKnownPredicate(
CmpInst::ICMP_UGE, StartOffset,
SE.getMulExpr(MaxBTC, SE.getNegativeSCEV(Step))) &&
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
index 7a4a33a9d880b..bfa38c8bee503 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -16,8 +16,8 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
@@ -46,7 +46,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -117,8 +117,8 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -147,7 +147,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -219,8 +219,8 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -249,7 +249,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -319,8 +319,8 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -349,7 +349,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index bd3d7f8f298a1..dc8a974de4f87 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -1971,6 +1971,36 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], [[REVERSE3]]
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1023, [[TMP12]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
@@ -1982,6 +2012,9 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[TMP13]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -2082,6 +2115,31 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[TMP11]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP53:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -2112,6 +2170,7 @@ declare i32 @foo(i32) readonly
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
+
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -2151,4 +2210,20 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[LOOP35]] = distinct !{[[LOOP35]], [[META2]], [[META1]]}
; CHECK: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
; CHECK: [[LOOP37]] = distinct !{[[LOOP37]], [[META2]], [[META1]]}
-
+; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[META1]], [[META2]]}
+; CHECK: [[LOOP39]] = distinct !{[[LOOP39]], [[META2]], [[META1]]}
+; CHECK: [[LOOP40]] = distinct !{[[LOOP40]], [[META1]], [[META2]]}
+; CHECK: [[LOOP41]] = distinct !{[[LOOP41]], [[META2]], [[META1]]}
+; CHECK: [[LOOP42]] = distinct !{[[LOOP42]], [[META1]], [[META2]]}
+; CHECK: [[LOOP43]] = distinct !{[[LOOP43]], [[META2]], [[META1]]}
+; CHECK: [[LOOP44]] = distinct !{[[LOOP44]], [[META1]], [[META2]]}
+; CHECK: [[LOOP45]] = distinct !{[[LOOP45]], [[META2]], [[META1]]}
+; CHECK: [[LOOP46]] = distinct !{[[LOOP46]], [[META1]], [[META2]]}
+; CHECK: [[LOOP47]] = distinct !{[[LOOP47]], [[META2]], [[META1]]}
+; CHECK: [[LOOP48]] = distinct !{[[LOOP48]], [[META1]], [[META2]]}
+; CHECK: [[LOOP49]] = distinct !{[[LOOP49]], [[META2]], [[META1]]}
+; CHECK: [[LOOP50]] = distinct !{[[LOOP50]], [[META1]], [[META2]]}
+; CHECK: [[LOOP51]] = distinct !{[[LOOP51]], [[META2]], [[META1]]}
+; CHECK: [[LOOP52]] = distinct !{[[LOOP52]], [[META1]], [[META2]]}
+; CHECK: [[LOOP53]] = distinct !{[[LOOP53]], [[META2]], [[META1]]}
+;.
>From 41f58ca60f397572767c050e0bdde507927c270f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 16 Jun 2025 10:45:14 +0100
Subject: [PATCH 6/7] !fixup address comments, thanks
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 101 ++++++++++++------
.../early-exit-runtime-checks.ll | 12 +--
...aluate-at-backedge-taken-count-wrapping.ll | 16 +--
...bolic-max-backedge-taken-count-may-wrap.ll | 2 -
...able-info-from-assumption-variable-size.ll | 40 +++----
.../single_early_exit_live_outs.ll | 46 ++++----
6 files changed, 126 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 8726407242597..ccd71d0990c83 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -188,12 +188,31 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
Members.push_back(Index);
}
+/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
+/// return nullptr.
+static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B,
+ ScalarEvolution &SE) {
+ if (!SE.willNotOverflow(Instruction::Add, false, A, B))
+ return nullptr;
+ return SE.getAddExpr(A, B);
+}
+
+/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
+/// return nullptr.
+static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
+ ScalarEvolution &SE) {
+ if (!SE.willNotOverflow(Instruction::Mul, false, A, B))
+ return nullptr;
+ return SE.getMulExpr(A, B);
+}
+
/// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at
/// \p MaxBTC is guaranteed inbounds of the accessed object.
-static bool evaluateAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
- const SCEV *MaxBTC,
- ScalarEvolution &SE,
- const DataLayout &DL) {
+static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
+ const SCEV *MaxBTC,
+ const SCEV *EltSize,
+ ScalarEvolution &SE,
+ const DataLayout &DL) {
auto *PointerBase = SE.getPointerBase(AR->getStart());
auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase);
if (!StartPtr)
@@ -208,29 +227,45 @@ static bool evaluateAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
const SCEV *Step = AR->getStepRecurrence(SE);
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
Step = SE.getNoopOrSignExtend(Step, WiderTy);
- MaxBTC = SE.getNoopOrSignExtend(MaxBTC, WiderTy);
+ MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
+
+ // For the computations below, make sure they don't unsigned wrap.
+ if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
+ return false;
+ const SCEV *StartOffset = SE.getNoopOrSignExtend(
+ SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
+
+ const SCEV *OffsetAtLastIter =
+ mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, false), SE);
+ if (!OffsetAtLastIter)
+ return false;
+
+ const SCEV *OffsetLastAccessedByte = addSCEVOverflow(
+ OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
+ if (!OffsetLastAccessedByte)
+ return false;
+
if (SE.isKnownPositive(Step)) {
- // For positive steps, check if (AR->getStart() - StartPtr) + MaxBTC <=
- // DerefBytes / Step
- const SCEV *StartOffset = SE.getNoopOrSignExtend(
- SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
- return SE.isKnownPredicate(
- CmpInst::ICMP_ULE, SE.getAddExpr(StartOffset, MaxBTC),
- SE.getUDivExpr(SE.getConstant(WiderTy, DerefBytes), Step));
+ // For positive steps, check if
+ // (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
+ // while making sure none of the computations unsigned wrap themselves.
+ const SCEV *LastAccessedByte =
+ addSCEVOverflow(StartOffset, OffsetLastAccessedByte, SE);
+ if (!LastAccessedByte)
+ return false;
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, LastAccessedByte,
+ SE.getConstant(WiderTy, DerefBytes));
}
+
if (SE.isKnownNegative(Step)) {
- // For negative steps, check using StartOffset == AR->getStart() - StartPtr:
- // * StartOffset >= MaxBTC * Step
- // * AND StartOffset <= DerefBytes / Step
- auto *StartOffset = SE.getNoopOrSignExtend(
- SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
- return SE.isKnownPredicate(
- CmpInst::ICMP_UGE, StartOffset,
- SE.getMulExpr(MaxBTC, SE.getNegativeSCEV(Step))) &&
- SE.isKnownPredicate(
- CmpInst::ICMP_ULE, StartOffset,
- SE.getUDivExpr(SE.getConstant(WiderTy, DerefBytes),
- SE.getNegativeSCEV(Step)));
+ // For negative steps check if
+ // * StartOffset >= (MaxBTC * Step + EltSize)
+ // * StartOffset <= DerefBytes.
+ return SE
+ .isKnownPredicate(CmpInst::ICMP_SGE, StartOffset,
+ OffsetLastAccessedByte)
+ SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
+ SE.getConstant(WiderTy, DerefBytes));
}
return false;
}
@@ -271,14 +306,18 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// than the start of the AddRec due to wrapping (for example consider
// MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
// will get incremented by EltSize before returning, so this effectively
- // sets ScEnd to unsigned max. Note that LAA separately checks that
- // accesses cannot not wrap, so unsigned max represents an upper bound.
- ScEnd = SE->getAddExpr(
- SE->getNegativeSCEV(EltSizeSCEV),
- SE->getSCEV(ConstantExpr::getIntToPtr(
- ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
- if (evaluateAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, *SE, DL))
+ // sets ScEnd to the maximum unsigned value for the type. Note that LAA
+ // separately checks that accesses cannot not wrap, so unsigned max
+ // represents an upper bound.
+ if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE,
+ DL)) {
ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
+ } else {
+ ScEnd = SE->getAddExpr(
+ SE->getNegativeSCEV(EltSizeSCEV),
+ SE->getSCEV(ConstantExpr::getIntToPtr(
+ ConstantInt::get(EltSizeSCEV->getType(), -1), AR->getType())));
+ }
}
const SCEV *Step = AR->getStepRecurrence(*SE);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
index 3ddcf9aebeebd..92703edc3236e 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
@@ -72,7 +72,7 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_not
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: (2000 + %B))
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
@@ -130,10 +130,10 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations_kno
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: (4004 + %B))
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (4004 + %A))
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -188,8 +188,8 @@ define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations_not
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: (4004 + %B))
; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
@@ -291,10 +291,10 @@ define i32 @b3_does_not_dominate_latch_known_deref(ptr dereferenceable(4000) %A,
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: (4004 + %B))
+; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: (4004 + %A))
+; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
index d58dd38d9fef8..54d671c62dbf4 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
@@ -14,15 +14,15 @@ define void @pointer_after_object_does_not_wrap(i32 %y, ptr %s, ptr %p) {
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
-; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group [[GRP1]]:
+; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: (%y + %p) High: (2147483647 + %p))
; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
-; CHECK-NEXT: Group [[GRP2]]:
+; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (%y + %s) High: (2147483647 + %s))
; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
; CHECK-EMPTY:
@@ -57,15 +57,15 @@ define void @pointer_after_object_would_wrap(i32 %y, ptr %s, ptr %p) {
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
-; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group [[GRP3]]:
+; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: (%y + %p) High: (-2147483648 + %p))
; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
-; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (%y + %s) High: (-2147483648 + %s))
; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
index 7aa4ebf59217c..e319c89577e24 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
@@ -3,7 +3,6 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-; FIXME: Start == End for access group with AddRec.
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
@@ -44,7 +43,6 @@ exit:
ret void
}
-; FIXME: Start > End for access group with AddRec.
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
; CHECK-NEXT: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
index bfa38c8bee503..a3029bab86943 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -16,8 +16,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
@@ -26,7 +25,8 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> poison, i8 [[TMP17]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -35,7 +35,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> [[TMP9]], i8 [[TMP13]], i32 1
@@ -46,7 +46,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i8> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -117,8 +117,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -127,7 +126,8 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -136,7 +136,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
@@ -147,7 +147,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -219,8 +219,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -229,7 +228,8 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -238,7 +238,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
@@ -249,7 +249,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -319,8 +319,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
@@ -329,7 +328,8 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP0]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -338,7 +338,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
@@ -349,7 +349,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP7]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index dc8a974de4f87..ec1e8fa1e1b33 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -1972,35 +1972,34 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], [[REVERSE3]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]]
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
+; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; CHECK: middle.split:
-; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true)
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1023, [[TMP12]]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ]
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
@@ -2012,9 +2011,9 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP1]], !llvm.loop [[LOOP51:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[TMP13]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -2121,8 +2120,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
-; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[TMP11]]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
@@ -2170,7 +2169,6 @@ declare i32 @foo(i32) readonly
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
-
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
>From fc91973e6cc808f96559ed190c291b11d8ca9cc1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 18 Jun 2025 18:30:57 +0100
Subject: [PATCH 7/7] !fixup address comments, thanks
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 41 ++++++++++++------------
1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ccd71d0990c83..532ea2708ca74 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -189,7 +189,7 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
}
/// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
-/// return nullptr.
+/// return nullptr. \p A and \p B must have the same type.
static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B,
ScalarEvolution &SE) {
if (!SE.willNotOverflow(Instruction::Add, false, A, B))
@@ -198,7 +198,7 @@ static const SCEV *addSCEVOverflow(const SCEV *A, const SCEV *B,
}
/// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
-/// return nullptr.
+/// return nullptr. \p A and \p B must have the same type.
static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
ScalarEvolution &SE) {
if (!SE.willNotOverflow(Instruction::Mul, false, A, B))
@@ -225,6 +225,10 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
return false;
const SCEV *Step = AR->getStepRecurrence(SE);
+ bool IsKnownNonNegative = SE.isKnownNonNegative(Step);
+ if (!IsKnownNonNegative && !SE.isKnownNegative(Step))
+ return false;
+
Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
Step = SE.getNoopOrSignExtend(Step, WiderTy);
MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy);
@@ -232,7 +236,7 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
// For the computations below, make sure they don't unsigned wrap.
if (!SE.isKnownPredicate(CmpInst::ICMP_UGE, AR->getStart(), StartPtr))
return false;
- const SCEV *StartOffset = SE.getNoopOrSignExtend(
+ const SCEV *StartOffset = SE.getNoopOrZeroExtend(
SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
const SCEV *OffsetAtLastIter =
@@ -240,34 +244,29 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
if (!OffsetAtLastIter)
return false;
- const SCEV *OffsetLastAccessedByte = addSCEVOverflow(
+ const SCEV *OffsetEndBytes = addSCEVOverflow(
OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
- if (!OffsetLastAccessedByte)
+ if (!OffsetEndBytes)
return false;
- if (SE.isKnownPositive(Step)) {
+ if (IsKnownNonNegative) {
// For positive steps, check if
// (AR->getStart() - StartPtr) + (MaxBTC * Step) + EltSize <= DerefBytes,
// while making sure none of the computations unsigned wrap themselves.
- const SCEV *LastAccessedByte =
- addSCEVOverflow(StartOffset, OffsetLastAccessedByte, SE);
- if (!LastAccessedByte)
+ const SCEV *EndBytes = addSCEVOverflow(StartOffset, OffsetEndBytes, SE);
+ if (!EndBytes)
return false;
- return SE.isKnownPredicate(CmpInst::ICMP_ULE, LastAccessedByte,
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes,
SE.getConstant(WiderTy, DerefBytes));
}
- if (SE.isKnownNegative(Step)) {
- // For negative steps check if
- // * StartOffset >= (MaxBTC * Step + EltSize)
- // * StartOffset <= DerefBytes.
- return SE
- .isKnownPredicate(CmpInst::ICMP_SGE, StartOffset,
- OffsetLastAccessedByte)
- SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
- SE.getConstant(WiderTy, DerefBytes));
- }
- return false;
+ // For negative steps check if
+ // * StartOffset >= (MaxBTC * Step + EltSize)
+ // * StartOffset <= DerefBytes.
+ assert(SE.isKnownNegative(Step) && "must be known negative");
+ return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) &&
+ SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset,
+ SE.getConstant(WiderTy, DerefBytes));
}
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
More information about the llvm-commits
mailing list