[llvm] [Analysis] Teach isDereferenceableAndAlignedInLoop about SCEV predicates (PR #106562)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 01:56:08 PDT 2024
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/106562
>From 29b18465a000f044da276b613477c10a5aa0a8ad Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 29 Aug 2024 14:40:47 +0000
Subject: [PATCH 1/5] [LoopVectorize] Add test for dereferenceable loop with
SCEV predicate
---
.../LoopVectorize/load-deref-pred-align.ll | 126 ++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index a7c9a18127ade5..58eaef16275c0e 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -296,3 +296,129 @@ latch:
loop_exit:
ret i8 %accum.next
}
+
+
+define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
+; CHECK-LABEL: @loop_requires_scev_predicate(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @init(ptr [[P1]])
+; CHECK-NEXT: call void @init(ptr [[P2]])
+; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END:%.*]], 1023
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
+; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
+; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], [[TMP14]]
+; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP21]]
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[DOWORK:%.*]] = icmp ne i32 [[TMP26]], 0
+; CHECK-NEXT: br i1 [[DOWORK]], label [[FOR_DOWORK:%.*]], label [[FOR_INC]]
+; CHECK: for.dowork:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[GEP_IND]]
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
+; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init(ptr %p1)
+ call void @init(ptr %p2)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx, align 4
+ %dowork = icmp ne i32 %0, 0
+ br i1 %dowork, label %for.dowork, label %for.inc
+
+for.dowork:
+ %arrayidx3 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx3, align 4
+ %add = add i32 %0, %1
+ %arrayidx5 = getelementptr inbounds i32, ptr %dest, i64 %gep.ind
+ store i32 %add, ptr %arrayidx5, align 4
+ br label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+exit:
+ ret i32 0
+}
>From cdabbafbd6997344ba8cc2956d6d83852b8536db Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 29 Aug 2024 14:40:59 +0000
Subject: [PATCH 2/5] [Analysis] Teach isDereferenceableAndAlignedInLoop about
SCEV predicates
Currently if a loop contains loads that we can prove at compile time
are dereferenceable when certain conditions are satisfied the function
isDereferenceableAndAlignedInLoop will still return false because
getSmallConstantMaxTripCount will return 0 when SCEV predicates
are required. This patch changes getSmallConstantMaxTripCount to take
an optional Predicates pointer argument so that we can permit
functions such as isDereferenceableAndAlignedInLoop to consider more
cases.
---
llvm/include/llvm/Analysis/Loads.h | 14 ++++---
llvm/include/llvm/Analysis/ScalarEvolution.h | 20 ++++++++--
llvm/lib/Analysis/Loads.cpp | 17 ++++----
llvm/lib/Analysis/ScalarEvolution.cpp | 34 +++++++++++-----
.../Vectorize/LoopVectorizationLegality.cpp | 8 +++-
.../LoopVectorize/load-deref-pred-align.ll | 39 ++++++++++---------
6 files changed, 84 insertions(+), 48 deletions(-)
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 1f01ff7027fa9a..639070c07897b0 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -27,6 +27,8 @@ class LoadInst;
class Loop;
class MemoryLocation;
class ScalarEvolution;
+class SCEVPredicate;
+template <typename T> class SmallVectorImpl;
class TargetLibraryInfo;
/// Return true if this is always a dereferenceable pointer. If the context
@@ -81,14 +83,16 @@ bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size,
/// that required by the header itself and could be hoisted into the header
/// if desired.) This is more powerful than the variants above when the
/// address loaded from is analyzeable by SCEV.
-bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- AssumptionCache *AC = nullptr);
+bool isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC = nullptr,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses.
-bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC);
+bool isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if we know that executing a load from this value cannot trap.
///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 89f9395959779d..2aaed8f992230a 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -823,8 +823,11 @@ class ScalarEvolution {
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
- /// Returns 0 if the trip count is unknown or not constant.
- unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns 0 if the trip count is unknown, not constant or requires
+ /// SCEV predicates and \p Predicates is nullptr.
+ unsigned getSmallConstantMaxTripCount(
+ const Loop *L,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
@@ -905,6 +908,13 @@ class ScalarEvolution {
return getBackedgeTakenCount(L, ConstantMaximum);
}
+ /// Similar to getConstantMaxBackedgeTakenCount, except it will add a set of
+ /// SCEV predicates to Predicates that are required to be true in order for
+ /// the answer to be correct. Predicates can be checked with run-time
+ /// checks and can be used to perform loop versioning.
+ const SCEV *getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Predicates);
+
/// When successful, this returns a SCEV that is greater than or equal
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
@@ -1507,7 +1517,7 @@ class ScalarEvolution {
/// Expression indicating the least constant maximum backedge-taken count of
/// the loop that is known, or a SCEVCouldNotCompute. This expression is
- /// only valid if the redicates associated with all loop exits are true.
+ /// only valid if the predicates associated with all loop exits are true.
const SCEV *ConstantMax = nullptr;
/// Indicating if \c ExitNotTaken has an element for every exiting block in
@@ -1586,7 +1596,9 @@ class ScalarEvolution {
}
/// Get the constant max backedge taken count for the loop.
- const SCEV *getConstantMax(ScalarEvolution *SE) const;
+ const SCEV *getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr) const;
/// Get the constant max backedge taken count for the particular loop exit.
const SCEV *getConstantMax(
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index a88469ab81a8c8..40f8153aeecb43 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -259,10 +259,9 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE,
- DominatorTree &DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
auto &DL = LI->getDataLayout();
Value *Ptr = LI->getPointerOperand();
@@ -287,7 +286,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L);
+ auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
if (!TC)
return false;
@@ -793,13 +792,13 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}
-bool llvm::isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
- if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC))
+ if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
return false;
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
return false;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 57e03f667ba6ff..4c660305ae317c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8191,10 +8191,13 @@ ScalarEvolution::getSmallConstantTripCount(const Loop *L,
return getConstantTripCount(ExitCount);
}
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+
const auto *MaxExitCount =
- dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
- return getConstantTripCount(MaxExitCount);
+ Predicates ? getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : getConstantMaxBackedgeTakenCount(L);
+ return getConstantTripCount(dyn_cast<SCEVConstant>(MaxExitCount));
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
@@ -8303,6 +8306,11 @@ const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount(
return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds);
}
+const SCEV *ScalarEvolution::getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Preds) {
+ return getPredicatedBackedgeTakenInfo(L).getConstantMax(this, &Preds);
+}
+
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
}
@@ -8626,15 +8634,21 @@ ScalarEvolution::BackedgeTakenInfo::getExitNotTaken(
}
/// getConstantMax - Get the constant max backedge taken count for the loop.
-const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
- auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
- return !ENT.hasAlwaysTruePredicate();
- };
-
- if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
+const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) const {
+ if (!getConstantMax())
return SE->getCouldNotCompute();
+ for (const auto &ENT : ExitNotTaken)
+ if (!ENT.hasAlwaysTruePredicate())
+ if (!Predicates)
+ return SE->getCouldNotCompute();
+ else {
+ for (const auto *P : ENT.Predicates)
+ Predicates->push_back(P);
+ }
+
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
"No point in having a non-constant max backedge taken count!");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7062e21383a5fc..d0163735492b73 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1334,11 +1334,17 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// we restrict this to loads; stores are more complicated due to
// concurrency restrictions.
ScalarEvolution &SE = *PSE.getSE();
+ SmallVector<const SCEVPredicate *, 4> Predicates;
for (Instruction &I : *BB) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
+ // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
+ // that it will consider loops that need guarding by SCEV checks. The
+ // vectoriser will generate these checks if we decide to vectorise.
if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
- isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
+ &Predicates))
SafePointers.insert(LI->getPointerOperand());
+ Predicates.clear();
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 58eaef16275c0e..1ef01e3b793d5b 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -326,35 +326,36 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
-; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], [[TMP14]]
-; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP19]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP21]]
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.if4:
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.continue5:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
>From f12b18bb6f6ca87cc7e86c8302120d18cd70465c Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Fri, 30 Aug 2024 15:36:23 +0000
Subject: [PATCH 3/5] Add braces around if statement
---
llvm/lib/Analysis/ScalarEvolution.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4c660305ae317c..8643c953aca582 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8641,13 +8641,14 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
return SE->getCouldNotCompute();
for (const auto &ENT : ExitNotTaken)
- if (!ENT.hasAlwaysTruePredicate())
+ if (!ENT.hasAlwaysTruePredicate()) {
if (!Predicates)
return SE->getCouldNotCompute();
else {
for (const auto *P : ENT.Predicates)
Predicates->push_back(P);
}
+ }
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
>From 99d1a56ebd67095005571420e261fb38dc507170 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 16 Sep 2024 15:46:13 +0000
Subject: [PATCH 4/5] Address review comment
* Minor code refactor in BackedgeTakenInfo::getConstantMax.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8643c953aca582..7d97cccc42e1db 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8644,10 +8644,8 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
if (!ENT.hasAlwaysTruePredicate()) {
if (!Predicates)
return SE->getCouldNotCompute();
- else {
- for (const auto *P : ENT.Predicates)
- Predicates->push_back(P);
- }
+ else
+ Predicates->append(ENT.Predicates.begin(), ENT.Predicates.end());
}
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
>From 5006d64c579fd009529c0222fdbd70494f443d9b Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 17 Sep 2024 08:55:14 +0000
Subject: [PATCH 5/5] Address review comments
* Remove else statement.
* Use append_range when appending predicates.
* Print out getPredicatedConstantMaxBackedgeTakenCount if different
to getConstantMaxBackedgeTakenCount.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 21 +++++++++++++++++--
.../ScalarEvolution/exit-count-non-strict.ll | 6 ++++++
.../ScalarEvolution/finite-trip-count.ll | 6 ++++++
.../Analysis/ScalarEvolution/ne-overflow.ll | 3 +++
.../ScalarEvolution/predicated-exit-count.ll | 4 ++++
...cated-symbolic-max-backedge-taken-count.ll | 6 ++++++
.../trip-count-implied-addrec.ll | 15 +++++++++++++
7 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 7d97cccc42e1db..430a5aed9dc4f9 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8644,8 +8644,7 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
if (!ENT.hasAlwaysTruePredicate()) {
if (!Predicates)
return SE->getCouldNotCompute();
- else
- Predicates->append(ENT.Predicates.begin(), ENT.Predicates.end());
+ append_range(*Predicates, ENT.Predicates);
}
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
@@ -13764,8 +13763,26 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
for (const auto *P : Preds)
P->print(OS, 4);
}
+ Preds.clear();
+ auto *PredConstantMax =
+ SE->getPredicatedConstantMaxBackedgeTakenCount(L, Preds);
+ if (PredConstantMax != ConstantBTC || !Preds.empty()) {
+ OS << "Loop ";
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": ";
+ if (!isa<SCEVCouldNotCompute>(PredConstantMax)) {
+ OS << "Predicated constant max backedge-taken count is ";
+ PrintSCEVWithTypeHint(OS, PredConstantMax);
+ } else
+ OS << "Unpredictable predicated constant max backedge-taken count.";
+ OS << "\n";
+ OS << " Predicates:\n";
+ for (const auto *P : Preds)
+ P->print(OS, 4);
+ }
Preds.clear();
+
auto *PredSymbolicMax =
SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds);
if (SymbolicBTC != PredSymbolicMax) {
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
index 6d64f76494638f..f7a18c77a82c8f 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
@@ -109,6 +109,9 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
@@ -238,6 +241,9 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
diff --git a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
index 471954f44311d4..a1538fd78ba17d 100644
--- a/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/finite-trip-count.ll
@@ -59,6 +59,9 @@ define void @sle_pre_inc_infinite(i32 %len) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (0 smax (1 + (sext i32 %len to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 2147483648
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (0 smax (1 + (sext i32 %len to i64))<nsw>)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nssw>
@@ -130,6 +133,9 @@ define void @ule_pre_inc_infinite(i32 %len) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (1 + (zext i32 %len to i64))<nuw><nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 4294967296
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (1 + (zext i32 %len to i64))<nuw><nsw>
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
index 49288c85897fd9..3022281658a75f 100644
--- a/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ne-overflow.ll
@@ -240,6 +240,9 @@ define void @test_zext(i64 %N) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (%N /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i64 9223372036854775807
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (%N /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,2}<nuw><%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
index de214183710ab3..3b398d422e36a3 100644
--- a/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-exit-count.ll
@@ -30,6 +30,10 @@ define i32 @multiple_exits_with_predicates(ptr %src1, ptr readonly %src2, i32 %e
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i32 1023
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (1023 umin (-1 + (1 umax %end)))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
index 2ec6158e9b0920..ee6052685b43b5 100644
--- a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll
@@ -20,6 +20,9 @@ define void @test1(i64 %x, ptr %a, ptr %b) {
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %header: Predicated constant max backedge-taken count is i64 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
@@ -71,6 +74,9 @@ define void @test2(i64 %x, ptr %a) {
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-EMPTY:
+; CHECK-NEXT: Loop %header: Predicated constant max backedge-taken count is i64 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%header> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
index b313842ad5e1a9..2ee2ec53f6c9e9 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll
@@ -61,6 +61,9 @@ define void @nw_implies_nsw(i16 %n) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (128 + (-128 smax %n))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -32641
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (128 + (-128 smax %n))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {-128,+,1}<%for.body> Added Flags: <nssw>
@@ -110,6 +113,9 @@ define void @actually_infinite() {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is i16 257
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 257
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is i16 257
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
@@ -138,6 +144,9 @@ define void @rhs_mustexit_1(i16 %n.raw) mustprogress {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<nw><%for.body> Added Flags: <nusw>
@@ -266,6 +275,9 @@ define void @neg_rhs_maybe_infinite(i16 %n.raw) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 -2
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is (-1 + (1 umax (-1 + (zext i8 (trunc i16 %n.raw to i8) to i16))<nsw>))
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
@@ -391,6 +403,9 @@ define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) {
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))<nsw>)<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
+; CHECK-NEXT: Loop %for.body: Predicated constant max backedge-taken count is i16 128
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
; CHECK-NEXT: Loop %for.body: Predicated symbolic max backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))<nsw>)<nsw> /u 2)
; CHECK-NEXT: Predicates:
; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: <nusw>
More information about the llvm-commits
mailing list