[llvm] [LAA] Be more careful when evaluating AddRecs at symbolic max BTC. (PR #128061)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 05:33:48 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/128061
>From 3ceba4670b9724ff0fb311f9ba7217c9f538a34a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 29 Aug 2024 12:21:20 +0100
Subject: [PATCH 1/3] [LAA] Be more careful when evaluating AddRecs at symbolic
max BTC.
Evaluating AR at the symbolic max BTC may wrap and create an expression
that is less than the start of the AddRec due to wrapping (for example
consider MaxBTC = -2).
If that's the case, set ScEnd to -(EltSize + 1). ScEnd will get
incremented by EltSize before returning, so this effectively
sets ScEnd to unsigned max. Note that LAA separately checks that
accesses cannot not wrap, so unsigned max represents an upper bound.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 4 +-
llvm/lib/Analysis/Loads.cpp | 5 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 ++++++---
...aluate-at-backedge-taken-count-wrapping.ll | 92 +++++++++++++++++++
...bolic-max-backedge-taken-count-may-wrap.ll | 6 +-
5 files changed, 133 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index cb6f47e3a76be..09ac3105b83ed 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -871,8 +871,8 @@ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
/// There is no conflict when the intervals are disjoint:
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
- ScalarEvolution *SE,
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
+ const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index b461c41d29e84..5a8eedfa261d2 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -319,11 +319,14 @@ bool llvm::isDereferenceableAndAlignedInLoop(
const SCEV *MaxBECount =
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
+ const SCEV *SymbolicMaxBECount =
+ Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : SE.getConstantMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), MaxBECount, SymbolicMaxBECount, &SE, nullptr);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index cf3bb6a8eae1c..172630f013b6b 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -189,8 +189,8 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
}
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
- const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
- ScalarEvolution *SE,
+ const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
+ const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -206,11 +206,31 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const SCEV *ScStart;
const SCEV *ScEnd;
+ auto &DL = Lp->getHeader()->getDataLayout();
+ Type *IdxTy = DL.getIndexType(PtrExpr->getType());
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
if (SE->isLoopInvariant(PtrExpr, Lp)) {
ScStart = ScEnd = PtrExpr;
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
ScStart = AR->getStart();
- ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
+ if (!isa<SCEVCouldNotCompute>(BTC))
+ // Evaluating AR at an exact BTC is safe: LAA separately checks that
+ // accesses cannot wrap in the loop. If evaluating AR at BTC wraps, then
+ // the loop either triggers UB when executing a memory access with a
+ // poison pointer or the wrapping/poisoned pointer is not used.
+ ScEnd = AR->evaluateAtIteration(BTC, *SE);
+ else {
+ // Evaluating AR at MaxBTC may wrap and create an expression that is less
+ // than the start of the AddRec due to wrapping (for example consider
+ // MaxBTC = -2). If that's the case, set ScEnd to -(EltSize + 1). ScEnd
+ // will get incremented by EltSize before returning, so this effectively
+ // sets ScEnd to unsigned max. Note that LAA separately checks that
+ // accesses cannot not wrap, so unsigned max represents an upper bound.
+ ScEnd = AR->evaluateAtIteration(SymbolicMaxBTC, *SE);
+ if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
+ ScEnd = SE->getNegativeSCEV(
+ SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
+ }
const SCEV *Step = AR->getStepRecurrence(*SE);
// For expressions with negative step, the upper bound is ScStart and the
@@ -232,9 +252,6 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
// Add the size of the pointed element to ScEnd.
- auto &DL = Lp->getHeader()->getDataLayout();
- Type *IdxTy = DL.getIndexType(PtrExpr->getType());
- const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
@@ -250,9 +267,11 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
unsigned DepSetId, unsigned ASId,
PredicatedScalarEvolution &PSE,
bool NeedsFreeze) {
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *BTC = PSE.getBackedgeTakenCount();
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
- Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
+ Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1934,11 +1953,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// required for correctness.
if (SE.isLoopInvariant(Src, InnermostLoop) ||
SE.isLoopInvariant(Sink, InnermostLoop)) {
- const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
+ const SCEV *BTC = PSE.getBackedgeTakenCount();
+ const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
+ InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &PointerBounds);
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
+ InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, PSE.getSE(),
+ &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
new file mode 100644
index 0000000000000..d58dd38d9fef8
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-backedge-taken-count-wrapping.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; Note: The datalayout for the test specifies a 32 bit index type.
+
+; No UB: accessing last valid byte, pointer after the object
+; doesnt wrap (%p + 2147483647).
+define void @pointer_after_object_does_not_wrap(i32 %y, ptr %s, ptr %p) {
+; CHECK-LABEL: 'pointer_after_object_does_not_wrap'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP1]]:
+; CHECK-NEXT: (Low: (%y + %p) High: (2147483647 + %p))
+; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
+; CHECK-NEXT: Group [[GRP2]]:
+; CHECK-NEXT: (Low: (%y + %s) High: (2147483647 + %s))
+; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
+ %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
+ %load = load i8, ptr %gep1.iv, align 4
+ %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+ store i8 %load, ptr %gep2.iv, align 4
+ %iv.next = add nsw i32 %iv, 1
+ %c.2 = icmp slt i32 %iv.next, 2147483647
+ br i1 %c.2, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; UB: accessing %p + 2147483646 and p + 2147483647.
+; Pointer the past the object would wrap in signed.
+define void @pointer_after_object_would_wrap(i32 %y, ptr %s, ptr %p) {
+; CHECK-LABEL: 'pointer_after_object_would_wrap'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Memory dependences are safe with run-time checks
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Check 0:
+; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
+; CHECK-NEXT: %gep1.iv = getelementptr inbounds i8, ptr %s, i32 %iv
+; CHECK-NEXT: Grouped accesses:
+; CHECK-NEXT: Group [[GRP3]]:
+; CHECK-NEXT: (Low: (%y + %p) High: (-2147483648 + %p))
+; CHECK-NEXT: Member: {(%y + %p),+,1}<nw><%loop>
+; CHECK-NEXT: Group [[GRP4]]:
+; CHECK-NEXT: (Low: (%y + %s) High: (-2147483648 + %s))
+; CHECK-NEXT: Member: {(%y + %s),+,1}<nw><%loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
+ %gep1.iv = getelementptr inbounds i8 , ptr %s, i32 %iv
+ %load = load i16, ptr %gep1.iv, align 4
+ %gep2.iv = getelementptr inbounds i8, ptr %p, i32 %iv
+ store i16 %load, ptr %gep2.iv, align 4
+ %iv.next = add nsw i32 %iv, 1
+ %c.2 = icmp slt i32 %iv.next, 2147483647
+ br i1 %c.2, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
index dd06cab26d095..0aa74c7b6442b 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
@@ -3,7 +3,6 @@
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-; FIXME: Start == End for access group with AddRec.
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
@@ -17,7 +16,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x,
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP1]]:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: ((4 * %y) + %P))
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group [[GRP2]]:
; CHECK-NEXT: (Low: %S High: (4 + %S))
@@ -44,7 +43,6 @@ exit:
ret void
}
-; FIXME: Start > End for access group with AddRec.
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
; CHECK-NEXT: loop:
@@ -58,7 +56,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP3]]:
-; CHECK-NEXT: (Low: ((4 * %y) + %P) High: (-4 + (4 * %y) + %P))
+; CHECK-NEXT: (Low: ((4 * %y) + %P) High: -1)
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group [[GRP4]]:
; CHECK-NEXT: (Low: %S High: (4 + %S))
>From 0d3c6eda5fbf0152c2fb5c97777d76bdd859aed3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 20 Feb 2025 21:46:11 +0100
Subject: [PATCH 2/3] !fix formatting
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 172630f013b6b..6dfb980a02e0c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -269,9 +269,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
bool NeedsFreeze) {
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
const SCEV *BTC = PSE.getBackedgeTakenCount();
- const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
- Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &DC.getPointerBounds());
+ const auto &[ScStart, ScEnd] =
+ getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &DC.getPointerBounds());
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
!isa<SCEVCouldNotCompute>(ScEnd) &&
"must be able to compute both start and end expressions");
@@ -1955,12 +1955,12 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
SE.isLoopInvariant(Sink, InnermostLoop)) {
const SCEV *BTC = PSE.getBackedgeTakenCount();
const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount();
- const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
- InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &PointerBounds);
- const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
- InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC, PSE.getSE(),
- &PointerBounds);
+ const auto &[SrcStart_, SrcEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Src, ATy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &PointerBounds);
+ const auto &[SinkStart_, SinkEnd_] =
+ getStartAndEndForAccess(InnermostLoop, Sink, BTy, BTC, SymbolicMaxBTC,
+ PSE.getSE(), &PointerBounds);
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
!isa<SCEVCouldNotCompute>(SinkStart_) &&
>From a5b5a137d79f58f36b59378b03d2f14cdaef1675 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 24 Feb 2025 11:13:19 +0000
Subject: [PATCH 3/3] !fixup fix isDereferenceableAndAlignedInLoop.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 7 +-
llvm/lib/Analysis/Loads.cpp | 8 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 6 +-
...able-info-from-assumption-variable-size.ll | 100 +++++++++++++++---
.../single_early_exit_live_outs.ll | 50 ++-------
5 files changed, 103 insertions(+), 68 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 09ac3105b83ed..3fe3986380a4e 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -857,7 +857,10 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
ScalarEvolution &SE, bool CheckType = true);
-/// Calculate Start and End points of memory access.
+/// Calculate Start and End points of memory access using exact backedge taken
+/// count \p BTC if computable or maximum backedge taken count \p MaxBTC
+/// otherwise.
+///
/// Let's assume A is the first access and B is a memory access on N-th loop
/// iteration. Then B is calculated as:
/// B = A + Step*N .
@@ -872,7 +875,7 @@ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
- const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
+ const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 5a8eedfa261d2..f8d8b6233649c 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -319,14 +319,14 @@ bool llvm::isDereferenceableAndAlignedInLoop(
const SCEV *MaxBECount =
Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
: SE.getConstantMaxBackedgeTakenCount(L);
- const SCEV *SymbolicMaxBECount =
- Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
- : SE.getConstantMaxBackedgeTakenCount(L);
+ const SCEV *BECount = Predicates
+ ? SE.getPredicatedBackedgeTakenCount(L, *Predicates)
+ : SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
- L, PtrScev, LI->getType(), MaxBECount, SymbolicMaxBECount, &SE, nullptr);
+ L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr);
if (isa<SCEVCouldNotCompute>(AccessStart) ||
isa<SCEVCouldNotCompute>(AccessEnd))
return false;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 6dfb980a02e0c..ed6aab81f7f07 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -190,7 +190,7 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC,
- const SCEV *SymbolicMaxBTC, ScalarEvolution *SE,
+ const SCEV *MaxBTC, ScalarEvolution *SE,
DenseMap<std::pair<const SCEV *, Type *>,
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
@@ -226,7 +226,9 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
// will get incremented by EltSize before returning, so this effectively
// sets ScEnd to unsigned max. Note that LAA separately checks that
// accesses cannot not wrap, so unsigned max represents an upper bound.
- ScEnd = AR->evaluateAtIteration(SymbolicMaxBTC, *SE);
+ // TODO: Use additional information to determine no-wrap including
+ // size/dereferencability info from the accessed object.
+ ScEnd = AR->evaluateAtIteration(MaxBTC, *SE);
if (!SE->isKnownNonNegative(SE->getMinusSCEV(ScEnd, ScStart)))
ScEnd = SE->getNegativeSCEV(
SE->getAddExpr(EltSizeSCEV, SE->getOne(EltSizeSCEV->getType())));
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
index d1cbe02192e31..bfa38c8bee503 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -16,15 +16,32 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i8> poison, i8 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> [[TMP9]], i8 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i8> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i8> [[WIDE_LOAD]], <2 x i8> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
@@ -100,15 +117,32 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(pt
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -185,15 +219,32 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -268,15 +319,32 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index e24c6090b704b..1da251ca41eb4 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -1480,57 +1480,21 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
-; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3
-; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
-; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[REVERSE]], [[REVERSE3]]
-; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX1]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
-; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
-; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
-; CHECK: middle.split:
-; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
-; CHECK: vector.early.exit:
-; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label [[LOOP_END]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 1023, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1585,7 +1549,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1608,7 +1572,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP39:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP37:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1679,6 +1643,4 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[LOOP35]] = distinct !{[[LOOP35]], [[META2]], [[META1]]}
; CHECK: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
; CHECK: [[LOOP37]] = distinct !{[[LOOP37]], [[META2]], [[META1]]}
-; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[META1]], [[META2]]}
-; CHECK: [[LOOP39]] = distinct !{[[LOOP39]], [[META2]], [[META1]]}
;.
More information about the llvm-commits
mailing list