[llvm] [Analysis] Teach isDereferenceableAndAlignedInLoop about SCEV predicates (PR #106562)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 30 08:37:04 PDT 2024
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/106562
>From 6610d5f62dbb63f89103250bdfcb3d382bc0fd84 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 29 Aug 2024 14:40:47 +0000
Subject: [PATCH 1/3] [LoopVectorize] Add test for dereferenceable loop with
SCEV predicate
---
.../LoopVectorize/load-deref-pred-align.ll | 126 ++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index a7c9a18127ade5..58eaef16275c0e 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -296,3 +296,129 @@ latch:
loop_exit:
ret i8 %accum.next
}
+
+
+define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
+; CHECK-LABEL: @loop_requires_scev_predicate(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @init(ptr [[P1]])
+; CHECK-NEXT: call void @init(ptr [[P2]])
+; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END:%.*]], 1023
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[END]] to i10
+; CHECK-NEXT: [[TMP1:%.*]] = zext i10 [[TMP0]] to i64
+; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END_CLAMPED]], i32 1)
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[UMAX]], -1
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[TMP4:%.*]] = add i8 1, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i32 [[TMP2]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], [[TMP14]]
+; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
+; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP21]]
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[DOWORK:%.*]] = icmp ne i32 [[TMP26]], 0
+; CHECK-NEXT: br i1 [[DOWORK]], label [[FOR_DOWORK:%.*]], label [[FOR_INC]]
+; CHECK: for.dowork:
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[GEP_IND]]
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
+; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init(ptr %p1)
+ call void @init(ptr %p2)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx, align 4
+ %dowork = icmp ne i32 %0, 0
+ br i1 %dowork, label %for.dowork, label %for.inc
+
+for.dowork:
+ %arrayidx3 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx3, align 4
+ %add = add i32 %0, %1
+ %arrayidx5 = getelementptr inbounds i32, ptr %dest, i64 %gep.ind
+ store i32 %add, ptr %arrayidx5, align 4
+ br label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+exit:
+ ret i32 0
+}
>From 42eaf089ddde542bb31f6f80100503b094e36407 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 29 Aug 2024 14:40:59 +0000
Subject: [PATCH 2/3] [Analysis] Teach isDereferenceableAndAlignedInLoop about
SCEV predicates
Currently if a loop contains loads that we can prove at compile time
are dereferenceable when certain conditions are satisfied the function
isDereferenceableAndAlignedInLoop will still return false because
getSmallConstantMaxTripCount will return 0 when SCEV predicates
are required. This patch changes getSmallConstantMaxTripCount to take
an optional Predicates pointer argument so that we can permit
functions such as isDereferenceableAndAlignedInLoop to consider more
cases.
---
llvm/include/llvm/Analysis/Loads.h | 14 ++++---
llvm/include/llvm/Analysis/ScalarEvolution.h | 20 ++++++++--
llvm/lib/Analysis/Loads.cpp | 17 ++++----
llvm/lib/Analysis/ScalarEvolution.cpp | 34 +++++++++++-----
.../Vectorize/LoopVectorizationLegality.cpp | 8 +++-
.../LoopVectorize/load-deref-pred-align.ll | 39 ++++++++++---------
6 files changed, 84 insertions(+), 48 deletions(-)
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 1f01ff7027fa9a..639070c07897b0 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -27,6 +27,8 @@ class LoadInst;
class Loop;
class MemoryLocation;
class ScalarEvolution;
+class SCEVPredicate;
+template <typename T> class SmallVectorImpl;
class TargetLibraryInfo;
/// Return true if this is always a dereferenceable pointer. If the context
@@ -81,14 +83,16 @@ bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size,
/// that required by the header itself and could be hoisted into the header
/// if desired.) This is more powerful than the variants above when the
/// address loaded from is analyzeable by SCEV.
-bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- AssumptionCache *AC = nullptr);
+bool isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC = nullptr,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses.
-bool isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC);
+bool isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Return true if we know that executing a load from this value cannot trap.
///
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index fe46a504bce5d1..741eff57b2a3d0 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -823,8 +823,11 @@ class ScalarEvolution {
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
- /// Returns 0 if the trip count is unknown or not constant.
- unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns 0 if the trip count is unknown, not constant or requires
+ /// SCEV predicates and \p Predicates is nullptr.
+ unsigned getSmallConstantMaxTripCount(
+ const Loop *L,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
@@ -898,6 +901,13 @@ class ScalarEvolution {
return getBackedgeTakenCount(L, ConstantMaximum);
}
+ /// Similar to getConstantMaxBackedgeTakenCount, except it will add a set of
+ /// SCEV predicates to Predicates that are required to be true in order for
+ /// the answer to be correct. Predicates can be checked with run-time
+ /// checks and can be used to perform loop versioning.
+ const SCEV *getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Predicates);
+
/// When successful, this returns a SCEV that is greater than or equal
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
@@ -1500,7 +1510,7 @@ class ScalarEvolution {
/// Expression indicating the least constant maximum backedge-taken count of
/// the loop that is known, or a SCEVCouldNotCompute. This expression is
- /// only valid if the redicates associated with all loop exits are true.
+ /// only valid if the predicates associated with all loop exits are true.
const SCEV *ConstantMax = nullptr;
/// Indicating if \c ExitNotTaken has an element for every exiting block in
@@ -1568,7 +1578,9 @@ class ScalarEvolution {
ScalarEvolution *SE) const;
/// Get the constant max backedge taken count for the loop.
- const SCEV *getConstantMax(ScalarEvolution *SE) const;
+ const SCEV *getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr) const;
/// Get the constant max backedge taken count for the particular loop exit.
const SCEV *getConstantMax(const BasicBlock *ExitingBlock,
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index a88469ab81a8c8..40f8153aeecb43 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -259,10 +259,9 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
- ScalarEvolution &SE,
- DominatorTree &DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableAndAlignedInLoop(
+ LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
+ AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
auto &DL = LI->getDataLayout();
Value *Ptr = LI->getPointerOperand();
@@ -287,7 +286,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
if (!Step)
return false;
- auto TC = SE.getSmallConstantMaxTripCount(L);
+ auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
if (!TC)
return false;
@@ -793,13 +792,13 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}
-bool llvm::isDereferenceableReadOnlyLoop(Loop *L, ScalarEvolution *SE,
- DominatorTree *DT,
- AssumptionCache *AC) {
+bool llvm::isDereferenceableReadOnlyLoop(
+ Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
- if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC))
+ if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
return false;
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
return false;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 54dde8401cdff0..12504c91144a37 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8175,10 +8175,13 @@ ScalarEvolution::getSmallConstantTripCount(const Loop *L,
return getConstantTripCount(ExitCount);
}
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+
const auto *MaxExitCount =
- dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
- return getConstantTripCount(MaxExitCount);
+ Predicates ? getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
+ : getConstantMaxBackedgeTakenCount(L);
+ return getConstantTripCount(dyn_cast<SCEVConstant>(MaxExitCount));
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
@@ -8270,6 +8273,11 @@ const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount(
return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds);
}
+const SCEV *ScalarEvolution::getPredicatedConstantMaxBackedgeTakenCount(
+ const Loop *L, SmallVectorImpl<const SCEVPredicate *> &Preds) {
+ return getPredicatedBackedgeTakenInfo(L).getConstantMax(this, &Preds);
+}
+
bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
return getBackedgeTakenInfo(L).isConstantMaxOrZero(this);
}
@@ -8604,15 +8612,21 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(
}
/// getConstantMax - Get the constant max backedge taken count for the loop.
-const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
- auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
- return !ENT.hasAlwaysTruePredicate();
- };
-
- if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
+const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
+ ScalarEvolution *SE,
+ SmallVectorImpl<const SCEVPredicate *> *Predicates) const {
+ if (!getConstantMax())
return SE->getCouldNotCompute();
+ for (const auto &ENT : ExitNotTaken)
+ if (!ENT.hasAlwaysTruePredicate())
+ if (!Predicates)
+ return SE->getCouldNotCompute();
+ else {
+ for (const auto *P : ENT.Predicates)
+ Predicates->push_back(P);
+ }
+
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
"No point in having a non-constant max backedge taken count!");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 66a779da8c25bc..183ea890b84e55 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1331,11 +1331,17 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// we restrict this to loads; stores are more complicated due to
// concurrency restrictions.
ScalarEvolution &SE = *PSE.getSE();
+ SmallVector<const SCEVPredicate *, 4> Predicates;
for (Instruction &I : *BB) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
+ // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
+ // that it will consider loops that need guarding by SCEV checks. The
+ // vectoriser will generate these checks if we decide to vectorise.
if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
- isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
+ &Predicates))
SafePointers.insert(LI->getPointerOperand());
+ Predicates.clear();
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
index 58eaef16275c0e..1ef01e3b793d5b 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll
@@ -326,35 +326,36 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) {
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
-; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], [[TMP14]]
-; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
-; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP19]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP21]]
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.if4:
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
+; CHECK: pred.store.continue5:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
>From ae560a14747edb95e57ae810cd5b7470ccf5f105 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Fri, 30 Aug 2024 15:36:23 +0000
Subject: [PATCH 3/3] Add braces around if statement
---
llvm/lib/Analysis/ScalarEvolution.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 12504c91144a37..8d4bbb28d55fda 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8619,13 +8619,14 @@ const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax(
return SE->getCouldNotCompute();
for (const auto &ENT : ExitNotTaken)
- if (!ENT.hasAlwaysTruePredicate())
+ if (!ENT.hasAlwaysTruePredicate()) {
if (!Predicates)
return SE->getCouldNotCompute();
else {
for (const auto *P : ENT.Predicates)
Predicates->push_back(P);
}
+ }
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
isa<SCEVConstant>(getConstantMax())) &&
More information about the llvm-commits
mailing list