[llvm] 02ee96e - [Analysis] Teach isDereferenceableAndAlignedInLoop about SCEV predicates (#106562)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 01:56:40 PDT 2024
Author: David Sherwood
Date: 2024-09-23T09:56:37+01:00
New Revision: 02ee96eca90741031a26f0f06cd48bb0ba558d1a
URL: https://github.com/llvm/llvm-project/commit/02ee96eca90741031a26f0f06cd48bb0ba558d1a
DIFF: https://github.com/llvm/llvm-project/commit/02ee96eca90741031a26f0f06cd48bb0ba558d1a.diff
LOG: [Analysis] Teach isDereferenceableAndAlignedInLoop about SCEV predicates (#106562)
Currently if a loop contains loads that we can prove at compile time
are dereferenceable when certain conditions are satisfied the function
isDereferenceableAndAlignedInLoop will still return false because
getSmallConstantMaxTripCount will return 0 when SCEV predicates
are required. This patch changes getSmallConstantMaxTripCount to take
an optional Predicates pointer argument so that we can permit
functions such as isDereferenceableAndAlignedInLoop to consider more
cases.
Added:
Modified:
llvm/include/llvm/Analysis/Loads.h
llvm/include/llvm/Analysis/ScalarEvolution.h
llvm/lib/Analysis/Loads.cpp
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 1f01ff7027fa9a..639070c07897b0 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -27,6 +27,8 @@ class LoadInst;
class Loop;
class MemoryLocation;
class ScalarEvolution;
+class SCEVPredicate;
+template <typename T> class SmallVectorImpl;
class TargetLibraryInfo;
/// Return true if this is always a dereferenceable pointer. If the context
@@ -81,14 +83,16 @@ bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size,
/// that required by the header itself and could be hoisted into the header
/// if desired.) This is more powerful than the variants above when the
/// address loaded from is analyzeable by SCEV.
-bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- AssumptionCache *AC = nullptr);
+bool isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC = nullptr,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses.
-bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC);
+bool isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if we know that executing a load from this value cannot trap.
///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 44fb249d584d88..68b860725752d0 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -823,8 +823,11 @@ class ScalarEvolution {
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
- /// Returns 0 if the trip count is unknown or not constant.
- unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns 0 if the trip count is unknown, not constant or requires
+ /// SCEV predicates and \p Predicates is nullptr.
+ unsigned getSmallConstantMaxTripCount(
+ const Loop *L,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
@@ -905,6 +908,13 @@ class ScalarEvolution {
return getBackedgeTakenCount(L, ConstantMaximum);
}
+ /// Similar to getConstantMaxBackedgeTakenCount, except it will add a set of
+ /// SCEV predicates to Predicates that are required to be true in order for
+ /// the answer to be correct. Predicates can be checked with run-time
+ /// checks and can be used to perform loop versioning.
+ const SCEV *getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Predicates);
+
/// When successful, this returns a SCEV that is greater than or equal
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
@@ -1506,7 +1516,7 @@ class ScalarEvolution {
/// Expression indicating the least constant maximum backedge-taken count of
/// the loop that is known, or a SCEVCouldNotCompute. This expression is
- /// only valid if the redicates associated with all loop exits are true.
+ /// only valid if the predicates associated with all loop exits are true.
const SCEV *ConstantMax = nullptr;
/// Indicating if \c ExitNotTaken has an element for every exiting block in
@@ -1585,7 +1595,9 @@ class ScalarEvolution {
}
/// Get the constant max backedge taken count for the loop.
- const SCEV *getConstantMax(ScalarEvolution *SE) const;
+ const SCEV *getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr) const;
/// Get the constant max backedge taken count for the particular loop exit.
const SCEV *getConstantMax(
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 11f3807ffacf6e..f4b202791a7081 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -276,10 +276,9 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE,
- DominatorTree &DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
auto &DL = LI->getDataLayout();
Value *Ptr = LI->getPointerOperand();
@@ -304,7 +303,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L);
+ auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
if (!TC)
return false;
@@ -810,13 +809,13 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}
-bool llvm::isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
- if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC))
+ if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
return false;
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
return false;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 1d3443588ce60d..233f8edca5b13b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8191,10 +8191,13 @@ ScalarEvolution::getSmallConstantTripCount(const Loop *L,
return getConstantTripCount(ExitCount);
}
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+
const auto *MaxExitCount =
- dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
- return getConstantTripCount(MaxExitCount);
+ Predicates ? getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : getConstantMaxBackedgeTakenCount(L);
+ return getConstantTripCount(dyn_cast<SCEVConstant>(MaxExitCount));
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
@@ -8303,6 +8306,11 @@ const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount(
return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds);
}
+const SCEV *ScalarEvolution::getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Preds) {
+ return getPredicatedBackedgeTakenInfo(L).getConstantMax(this, &Preds);
+}
+
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
}
@@ -8624,15 +8632,19 @@ ScalarEvolution::BackedgeTakenInfo::getExitNotTaken(
}
/// getConstantMax - Get the constant max backedge taken count for the loop.
-const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
- auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
- return !ENT.hasAlwaysTruePredicate();
- };
-
- if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
+const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) const {
+ if (!getConstantMax())
return SE->getCouldNotCompute();
+ for (const auto &ENT : ExitNotTaken)
+ if (!ENT.hasAlwaysTruePredicate()) {
+ if (!Predicates)
+ return SE->getCouldNotCompute();
+ append_range(*Predicates, ENT.Predicates);
+ }
+
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
"No point in having a non-constant max backedge taken count!");
@@ -13749,8 +13761,28 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
for (const auto *P : Preds)
P->print(OS, 4);
}
+ Preds.clear();
+ auto *PredConstantMax =
+ SE->getPredicatedConstantMaxBackedgeTakenCount(L, Preds);
+ if (PredConstantMax != ConstantBTC) {
+ assert(!Preds.empty() &&
+ "
diff erent predicated constant max BTC but no predicates");
+ OS << "Loop ";
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": ";
+ if (!isa<SCEVCouldNotCompute>(PredConstantMax)) {
+ OS << "Predicated constant max backedge-taken count is ";
+ PrintSCEVWithTypeHint(OS, PredConstantMax);
+ } else
+ OS << "Unpredictable predicated constant max backedge-taken count.";
+ OS << "\n";
+ OS << " Predicates:\n";
+ for (const auto *P : Preds)
+ P->print(OS, 4);
+ }
Preds.clear();
+
auto *PredSymbolicMax =
SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds);
if (SymbolicBTC != PredSymbolicMax) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index a4787483813a9a..b767372a56b914 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1334,11 +1334,17 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// we restrict this to loads; stores are more complicated due to
// concurrency restrictions.
ScalarEvolution &SE = *PSE.getSE();
+ SmallVector<const SCEVPredicate *, 4> Predicates;
for (Instruction &I : *BB) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
+ // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
+ // that it will consider loops that need guarding by SCEV checks. The
+ // vectoriser will generate these checks if we decide to vectorise.
if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
- isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
+ &Predicates))
SafePointers.insert(LI->getPointerOperand());
+ Predicates.clear();
}
}
@@ -1564,7 +1570,9 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"Expected latch predecessor to be the early exiting block");
// TODO: Handle loops that may fault.
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
+ Predicates.clear();
+ if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+ &Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
index 6d64f76494638f..f7a18c77a82c8f 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
@@ -109,6 +109,9 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
@@ -238,6 +241,9 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
diff --git a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
index 471954f44311d4..a1538fd78ba17d 100644
--- a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
@@ -59,6 +59,9 @@ define void @sle_pre_inc_infinite(i32 %len) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (0 smax (1 + (sext i32 %len to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 2147483648
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (0 smax (1 + (sext i32 %len to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
@@ -130,6 +133,9 @@ define void @ule_pre_inc_infinite(i32 %len) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (1 + (zext i32 %len to i64))<nuw><nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 4294967296
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (1 + (zext i32 %len to i64))<nuw><nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
index 49288c85897fd9..3022281658a75f 100644
--- a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
@@ -240,6 +240,9 @@ define void @test_zext(i64 %N) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (%N /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (%N /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
index de214183710ab3..3b398d422e36a3 100644
--- a/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
@@ -30,6 +30,10 @@ define i32 @multiple_exits_with_predicates(ptr %src1, ptr readonly %src2, i32 %e
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 1023
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (1023 umin (-1 + (1 umax %end)))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
index 2ec6158e9b0920..ee6052685b43b5 100644
--- a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
@@ -20,6 +20,9 @@ define void @test1(i64 %x, ptr %a, ptr %b) {
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %header: Predicated constant max backedge-taken count is i64 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
@@ -71,6 +74,9 @@ define void @test2(i64 %x, ptr %a) {
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %header: Predicated constant max backedge-taken count is i64 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
index b313842ad5e1a9..2ee2ec53f6c9e9 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
@@ -61,6 +61,9 @@ define void @nw_implies_nsw(i16 %n) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (128 + (-128 smax %n))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -32641
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (128 + (-128 smax %n))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
@@ -110,6 +113,9 @@ define void @actually_infinite() {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is i16 257
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 257
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is i16 257
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
@@ -138,6 +144,9 @@ define void @rhs_mustexit_1(i16 %n.raw) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
@@ -266,6 +275,9 @@ define void @neg_rhs_maybe_infinite(i16 %n.raw) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
@@ -391,6 +403,9 @@ define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))<nsw>)<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 128
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))<nsw>)<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index a7c9a18127ade5..1ef01e3b793d5b 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -296,3 +296,130 @@ latch:
loop_exit:
ret i8 %accum.next
}
+
+
+define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
+; CHECK-LABEL: @loop_requires_scev_predicate(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @init(ptr [[P1]])
+; CHECK-NEXT: call void @init(ptr [[P2]])
+; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END:%.*]], 1023
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
+; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
+; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.if4:
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.continue5:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[DOWORK:%.*]] = icmp ne i32 [[TMP26]], 0
+; CHECK-NEXT: br i1 [[DOWORK]], label [[FOR_DOWORK:%.*]], label [[FOR_INC]]
+; CHECK: for.dowork:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[GEP_IND]]
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
+; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init(ptr %p1)
+ call void @init(ptr %p2)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx, align 4
+ %dowork = icmp ne i32 %0, 0
+ br i1 %dowork, label %for.dowork, label %for.inc
+
+for.dowork:
+ %arrayidx3 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx3, align 4
+ %add = add i32 %0, %1
+ %arrayidx5 = getelementptr inbounds i32, ptr %dest, i64 %gep.ind
+ store i32 %add, ptr %arrayidx5, align 4
+ br label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+exit:
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
index dcf5c9d8ac64d1..936c07b4853a38 100644
--- a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll
@@ -1621,12 +1621,12 @@ loop.end:
; The form of the induction variables requires SCEV predicates.
-; TODO: We should fix isDereferenceableAndAlignedInLoop and
-; getSmallConstantMaxTripCount to cope with SCEV predicates when
-; requesting the small constant max trip count.
define i32 @
diff _exit_block_needs_scev_check(i32 %end) {
; DEBUG-LABEL: LV: Checking a loop in '
diff _exit_block_needs_scev_check'
-; DEBUG: LV: Not vectorizing: Loop may fault.
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported.
; CHECK-LABEL: define i32 @
diff _exit_block_needs_scev_check(
; CHECK-SAME: i32 [[END:%.*]]) {
; CHECK-NEXT: entry:
More information about the llvm-commits
mailing list