[llvm] [LoopVectorize] Teach LoopVectorizationLegality about more early exits (PR #107004)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 07:48:24 PDT 2024
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/107004
>From ec588109dfeb336a742ea515128d545043f40821 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 2 Sep 2024 15:33:24 +0000
Subject: [PATCH 1/3] [LoopVectorize] Teach LoopVectorizationLegality about
more early exits
This patch is split off from PR #88385 and concerns only the code
related to the legality of vectorising early exit loops. It is the
first step in adding support for vectorisation of a simple class of
loops that typically involves searching for something, i.e.
for (int i = 0; i < n; i++) {
if (p[i] == val)
return i;
}
return n;
or
for (int i = 0; i < n; i++) {
if (p1[i] != p2[i])
return i;
}
return n;
In this initial commit LoopVectorizationLegality will only consider
early exit loops legal for vectorising if they follow these criteria:
1. There are no stores in the loop.
2. The loop must have only one early exit like those shown in the above
example. I have referred to such exits as speculative early exits, to
distinguish from existing support for early exits where the
exit-not-taken count is known exactly at compile time.
3. The early exit block dominates the latch block.
4. The latch block must have an exact exit count.
5. There are no loads after the early exit block.
6. The loop must not contain reductions or recurrences. I don't see
anything fundamental blocking vectorisation of such loops, but I just
haven't done the work to support them yet.
7. We must be able to prove at compile-time that loops will not contain
faulting loads.
Tests have been added here:
Transforms/LoopVectorize/AArch64/simple_early_exit.ll
---
.../llvm/Analysis/LoopAccessAnalysis.h | 4 +-
.../Vectorize/LoopVectorizationLegality.h | 50 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 7 +-
.../Vectorize/LoopVectorizationLegality.cpp | 158 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +
.../AArch64/simple_early_exit.ll | 1584 +++++++++++++++++
.../X86/vectorization-remarks-missed.ll | 10 +-
.../Transforms/LoopVectorize/control-flow.ll | 2 +-
.../LoopVectorize/remarks-multi-exit-loops.ll | 2 +-
9 files changed, 1800 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index a35bc7402d1a89..8d7eb0cb3e87e8 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -661,7 +661,8 @@ class LoopAccessInfo {
bool isInvariant(Value *V) const;
unsigned getNumStores() const { return NumStores; }
- unsigned getNumLoads() const { return NumLoads;}
+ unsigned getNumLoads() const { return NumLoads; }
+ unsigned getNumCalls() const { return NumCalls; }
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
@@ -754,6 +755,7 @@ class LoopAccessInfo {
unsigned NumLoads = 0;
unsigned NumStores = 0;
+ unsigned NumCalls = 0;
/// Cache the result of analyzeLoop.
bool CanVecMem = false;
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 0f4d1355dd2bfe..bb21f97c0ae521 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -377,6 +377,24 @@ class LoopVectorizationLegality {
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
}
+ /// Returns true if the loop has a speculative early exit, i.e. an
+ /// uncountable exit that isn't the latch block.
+ bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; }
+
+ /// Returns the speculative early exiting block.
+ BasicBlock *getSpeculativeEarlyExitingBlock() const {
+ assert(getUncountableExitingBlocks().size() == 1 &&
+ "Expected only a single uncountable exiting block");
+ return getUncountableExitingBlocks()[0];
+ }
+
+ /// Returns the destination of a speculative early exiting block.
+ BasicBlock *getSpeculativeEarlyExitBlock() const {
+ assert(getUncountableExitBlocks().size() == 1 &&
+ "Expected only a single uncountable exit block");
+ return getUncountableExitBlocks()[0];
+ }
+
/// Returns true if vector representation of the instruction \p I
/// requires mask.
bool isMaskRequired(const Instruction *I) const {
@@ -404,6 +422,22 @@ class LoopVectorizationLegality {
DominatorTree *getDominatorTree() const { return DT; }
+ /// Returns all exiting blocks with a countable exit, i.e. the
+ /// exit-not-taken count is known exactly at compile time.
+ const SmallVector<BasicBlock *, 4> &getCountableExitingBlocks() const {
+ return CountableExitingBlocks;
+ }
+
+ /// Returns all the exiting blocks with an uncountable exit.
+ const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const {
+ return UncountableExitingBlocks;
+ }
+
+ /// Returns all the exit blocks from uncountable exiting blocks.
+ SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const {
+ return UncountableExitBlocks;
+ }
+
private:
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
/// its nested loops are considered legal for vectorization. These legal
@@ -436,7 +470,7 @@ class LoopVectorizationLegality {
/// we read and write from memory. This method checks if it is
/// legal to vectorize the code, considering only memory constrains.
/// Returns true if the loop is vectorizable
- bool canVectorizeMemory();
+ bool canVectorizeMemory(bool IsEarlyExitLoop);
/// Return true if we can vectorize this loop using the IF-conversion
/// transformation.
@@ -446,6 +480,10 @@ class LoopVectorizationLegality {
/// specific checks for outer loop vectorization.
bool canVectorizeOuterLoop();
+ /// Returns true if this is a supported early exit loop that we can
+ /// vectorize.
+ bool isVectorizableEarlyExitLoop();
+
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -551,6 +589,16 @@ class LoopVectorizationLegality {
/// (potentially) make a better decision on the maximum VF and enable
/// the use of those function variants.
bool VecCallVariantsFound = false;
+
+ /// Indicates whether this loop has a speculative early exit, i.e. an
+ /// uncountable exiting block that is not the latch.
+ bool HasSpeculativeEarlyExit = false;
+
+ /// Keeps track of all the exits with known or countable exit-not-taken
+ /// counts.
+ SmallVector<BasicBlock *, 4> CountableExitingBlocks;
+ SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
+ SmallVector<BasicBlock *, 4> UncountableExitBlocks;
};
} // namespace llvm
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 980f142f113265..bf367a77aae1ce 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2445,8 +2445,11 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// vectorize a loop if it contains known function calls that don't set
// the flag. Therefore, it is safe to ignore this read from memory.
auto *Call = dyn_cast<CallInst>(&I);
- if (Call && getVectorIntrinsicIDForCall(Call, TLI))
- continue;
+ if (Call) {
+ NumCalls++;
+ if (getVectorIntrinsicIDForCall(Call, TLI))
+ continue;
+ }
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7062e21383a5fc..64f1d4187f1b05 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1051,7 +1051,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return true;
}
-bool LoopVectorizationLegality::canVectorizeMemory() {
+bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
LAI = &LAIs.getInfo(*TheLoop);
const OptimizationRemarkAnalysis *LAR = LAI->getReport();
if (LAR) {
@@ -1073,6 +1073,50 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
}
+ // For loops with uncountable early exiting blocks that are not the latch
+ // it's necessary to perform extra checks, since the vectoriser is currently
+ // only capable of handling simple search loops.
+ if (IsEarlyExitLoop) {
+ // We don't support calls or any memory accesses that write to memory.
+ if (LAI->getNumStores()) {
+ reportVectorizationFailure(
+ "Writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ if (LAI->getNumCalls()) {
+ reportVectorizationFailure(
+ "Calls unsupported in early exit loops",
+ "Cannot vectorize early exit loop with function calls",
+ "CallsInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ // The vectoriser cannot handle loads that occur after the early exit block.
+ BasicBlock *LatchBB = TheLoop->getLoopLatch();
+ for (Instruction &I : *LatchBB) {
+ if (I.mayReadFromMemory()) {
+ reportVectorizationFailure(
+ "Loads not permitted after early exit",
+ "Cannot vectorize early exit loop with loads after early exit",
+ "LoadsAfterEarlyExit", ORE, TheLoop);
+ return false;
+ }
+ }
+
+ // The vectoriser does not yet handle loops that may fault, but this will
+ // be improved in a follow-on patch.
+ if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+ }
+
// We can vectorize stores to invariant address when final reduction value is
// guaranteed to be stored at the end of the loop. Also, if decision to
// vectorize loop is made, runtime checks are added so as to make sure that
@@ -1445,6 +1489,95 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
return Result;
}
+bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
+ // At least one of the exiting blocks must be the latch.
+ BasicBlock *LatchBB = TheLoop->getLoopLatch();
+ if (!LatchBB) {
+ reportVectorizationFailure("Loop does not have a latch",
+ "Cannot vectorize early exit loop",
+ "NoLatchEarlyExit", ORE, TheLoop);
+ return false;
+ }
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ TheLoop->getExitingBlocks(ExitingBlocks);
+
+ // Keep a record of all the exiting blocks with exact exit counts, as well as
+ // those with inexact counts.
+ SmallVector<const SCEVPredicate *, 4> Predicates;
+ for (BasicBlock *BB1 : ExitingBlocks) {
+ const SCEV *EC =
+ PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates);
+ if (isa<SCEVCouldNotCompute>(EC)) {
+ UncountableExitingBlocks.push_back(BB1);
+
+ unsigned NumExitBlocks = 0;
+ for (BasicBlock *BB2 : successors(BB1)) {
+ if (!TheLoop->contains(BB2)) {
+ UncountableExitBlocks.push_back(BB2);
+ NumExitBlocks++;
+ }
+ }
+ if (NumExitBlocks > 1) {
+ reportVectorizationFailure(
+ "Early exiting block has more than one successor outside of loop",
+ "Too many successors from early exiting block",
+ "EarlyExitTooManySuccessors", ORE, TheLoop);
+ return false;
+ }
+ } else
+ CountableExitingBlocks.push_back(BB1);
+ }
+ Predicates.clear();
+
+ // We only support one uncountable early exit.
+ if (getUncountableExitingBlocks().size() != 1) {
+ reportVectorizationFailure(
+ "Loop has too many uncountable exits",
+ "Cannot vectorize early exit loop with more than one early exit",
+ "TooManyUncountableEarlyExits", ORE, TheLoop);
+ return false;
+ }
+
+ // The only supported early exit loops so far are ones where the early
+ // exiting block is a unique predecessor of the latch block.
+ BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
+ if (!LatchPredBB || LatchPredBB != getUncountableExitingBlocks()[0]) {
+ reportVectorizationFailure("Early exit is not the latch predecessor",
+ "Cannot vectorize early exit loop",
+ "EarlyExitNotLatchPredecessor", ORE, TheLoop);
+ return false;
+ }
+
+ if (Reductions.size() || FixedOrderRecurrences.size()) {
+ reportVectorizationFailure(
+ "Found reductions or recurrences in early-exit loop",
+ "Cannot vectorize early exit loop with reductions or recurrences",
+ "RecurrencesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Found an early exit. Retrying with speculative exit count.\n");
+ if (isa<SCEVCouldNotCompute>(
+ PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
+ reportVectorizationFailure(
+ "Cannot determine exact exit count for latch block",
+ "Cannot vectorize early exit loop",
+ "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount();
+ assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
+ "Failed to get symbolic expression for backedge taken count");
+
+ LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: "
+ << *SpecExitCount << '\n');
+ return true;
+}
+
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
// Store the result and return it at the end instead of exiting early, in case
// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
@@ -1505,19 +1638,20 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
- // Go over each instruction and look at memory deps.
- if (!canVectorizeMemory()) {
- LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
+ HasSpeculativeEarlyExit = false;
+ if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
+ if (!isVectorizableEarlyExitLoop()) {
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
+ } else
+ HasSpeculativeEarlyExit = true;
}
- if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
- reportVectorizationFailure("could not determine number of loop iterations",
- "could not determine number of loop iterations",
- "CantComputeNumberOfIterations", ORE, TheLoop);
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeMemory(HasSpeculativeEarlyExit)) {
+ LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
if (DoExtraAnalysis)
Result = false;
else
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0fa7c2add1faa2..fcdfdbe5e6cd52 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9815,6 +9815,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
+ if (LVL.hasSpeculativeEarlyExit()) {
+ LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Auto-vectorization of early "
+ << "exit loops is not yet supported.\n");
+ return false;
+ }
+
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
new file mode 100644
index 00000000000000..d677598399f1c2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -0,0 +1,1584 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize -mattr=+sve -debug-only=loop-vectorize \
+; RUN: -mtriple aarch64-linux-gnu 2>%t | FileCheck %s --check-prefixes=CHECK
+; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
+
+declare void @init_mem(ptr, i64);
+
+define i64 @same_exit_block_pre_inc_use1() {
+; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: 63
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported.
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_gep_two_indices(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds [1024 x i8], ptr %p1, i64 0, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds [1024 x i8], ptr %p2, i64 0, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_alloca_diff_type(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [40 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [40 x i32], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [40 x i32]
+ %p2 = alloca [40 x i32]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use2() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 67, %loop ], [ %index, %loop.inc ]
+ ret i64 %retval
+}
+
+define i64 @same_exit_block_pre_inc_use3() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use3(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ ret i64 %index
+}
+
+
+; In this example the early exit block appears in the list of ExitNotTaken
+; SCEVs, but is not computable.
+define i64 @same_exit_block_pre_inc_use4() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i64]
+ %p2 = alloca [1024 x i64]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index
+ %ld1 = load i64, ptr %arrayidx, align 1
+ %cmp3 = icmp ult i64 %index, %ld1
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+
+define i64 @same_exit_block_post_inc_use() {
+; CHECK-LABEL: define i64 @same_exit_block_post_inc_use(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ %index.next, %loop.inc ]
+ ret i64 %retval
+}
+
+define i64 @same_exit_block_post_inc_use2() {
+; CHECK-LABEL: define i64 @same_exit_block_post_inc_use2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %index.next = add i64 %index, 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index.next, %loop ], [ %index, %loop.inc ]
+ ret i64 %retval
+}
+
+define i64 @same_exit_block_phi_of_consts() {
+; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @diff_exit_block_pre_inc_use1() {
+; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[RETVAL1]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL2]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ %retval1 = phi i64 [ %index, %loop ]
+ ret i64 %retval1
+
+loop.end:
+ %retval2 = phi i64 [ 67, %loop.inc ]
+ ret i64 %retval2
+}
+
+define i64 @diff_exit_block_pre_inc_use2() {
+; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[RETVAL1]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL2]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ %retval1 = phi i64 [ 67, %loop ]
+ ret i64 %retval1
+
+loop.end:
+ %retval2 = phi i64 [ %index, %loop.inc ]
+ ret i64 %retval2
+}
+
+define i64 @diff_exit_block_pre_inc_use3() {
+; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use3(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ ret i64 %index
+
+loop.end:
+ ret i64 %index
+}
+
+
+define i64 @diff_exit_block_phi_of_consts() {
+; CHECK-LABEL: define i64 @diff_exit_block_phi_of_consts(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: ret i64 0
+; CHECK: loop.end:
+; CHECK-NEXT: ret i64 1
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ ret i64 0
+
+loop.end:
+ ret i64 1
+}
+
+
+define i64 @diff_exit_block_post_inc_use1() {
+; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[RETVAL1]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL2]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ %retval1 = phi i64 [ %index, %loop ]
+ ret i64 %retval1
+
+loop.end:
+ %retval2 = phi i64 [ %index.next, %loop.inc ]
+ ret i64 %retval2
+}
+
+
+define i64 @diff_exit_block_post_inc_use2() {
+; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]]
+; CHECK: loop.early.exit:
+; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ]
+; CHECK-NEXT: ret i64 [[RETVAL1]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL2]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %index.next = add i64 %index, 1
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.early.exit
+
+loop.inc:
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.early.exit:
+ %retval1 = phi i64 [ %index.next, %loop ]
+ ret i64 %retval1
+
+loop.end:
+ %retval2 = phi i64 [ %index, %loop.inc ]
+ ret i64 %retval2
+}
+
+
+; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't
+; support this yet.
+define i64 @early_exit_on_last_block() {
+; DEBUG-LABEL: LV: Checking a loop in 'early_exit_on_last_block'
+; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor.
+; CHECK-LABEL: define i64 @early_exit_on_last_block(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[SEARCH:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: search:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT]], label [[LAND_RHS]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ 64, [[LAND_RHS]] ], [ [[INDEX]], [[SEARCH]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %search ], [ 3, %entry ]
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %search, label %loop.end
+
+search:
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.end, label %loop
+
+loop.end:
+ %retval = phi i64 [ 64, %loop ], [ %index, %search ]
+ ret i64 %retval
+}
+
+
+; There are multiple exit blocks - two of them have an exact representation for the
+; exit-not-taken counts and the other is unknown, i.e. the "early exit".
+define i64 @multiple_exits_one_early() {
+; CHECK-LABEL: define i64 @multiple_exits_one_early(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
+; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
+; CHECK: search:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %cmp1 = icmp ne i64 %index, 64
+ br i1 %cmp1, label %search, label %loop.end
+
+search:
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.end, label %loop.inc
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 128
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 64, %loop ], [ %index, %search ], [ 128, %loop.inc ]
+ ret i64 %retval
+}
+
+
+; We don't currently support multiple early exits.
+define i64 @multiple_early_exits() {
+; DEBUG-LABEL: LV: Checking a loop in 'multiple_early_exits'
+; DEBUG: LV: Not vectorizing: Loop has too many uncountable exits.
+; CHECK-LABEL: define i64 @multiple_early_exits(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: search1:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_INC:%.*]]
+; CHECK: search2:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[TMP41]], 34
+; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_END_LOOPEXIT]], label [[FOR_INC1]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 100, [[FOR_INC]] ], [ 43, [[FOR_INC1]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %search1
+
+search1:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp1 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp1, label %loop.end, label %search2
+
+search2:
+ %cmp2 = icmp ult i8 %ld1, 34
+ br i1 %cmp2, label %loop.end, label %loop.inc
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %search1, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @early_exit_infinite_loop() {
+; DEBUG-LABEL: LV: Checking a loop in 'early_exit_infinite_loop'
+; DEBUG: LV: Not vectorizing: Cannot determine exact exit count for latch block.
+; CHECK-LABEL: define i64 @early_exit_infinite_loop(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br label [[LAND_RHS]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br label %loop
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use_inv_cond(i1 %cond) {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use_inv_cond(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: [[CMP4:%.*]] = select i1 [[COND]], i1 [[CMP3]], i1 false
+; CHECK-NEXT: br i1 [[CMP4]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ %cmp4 = select i1 %cond, i1 %cmp3, i1 false
+ br i1 %cmp4, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_reverse() {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_reverse(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 1023, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 1023, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, -1
+ %exitcond = icmp eq i64 %index.next, 0
+ br i1 %exitcond, label %loop.end, label %loop
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 1024, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_with_reduction() {
+; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction'
+; DEBUG: LV: Not vectorizing: Found reductions or recurrences in early-exit loop.
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_with_reduction(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LAND_RHS:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[RED_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[LD2_ZEXT:%.*]] = zext i8 [[TMP39]] to i64
+; CHECK-NEXT: [[RED_NEXT]] = add i64 [[RED]], [[LD2_ZEXT]]
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], [[FOR_INC]] ], [ [[RED_NEXT]], [[LAND_RHS]] ]
+; CHECK-NEXT: [[FINAL_IND:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ]
+; CHECK-NEXT: [[START_0_LCSSA:%.*]] = add i64 [[RED_NEXT_LCSSA]], [[FINAL_IND]]
+; CHECK-NEXT: ret i64 [[START_0_LCSSA]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %ld2.zext = zext i8 %ld2 to i64
+ %red.next = add i64 %red, %ld2.zext
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ %retval = add i64 %red.next, %final.ind
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p1, ptr dereferenceable(1024) %p2) {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(1024) [[P1:%.*]], ptr dereferenceable(1024) [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+; The form of the induction variables requires SCEV predicates.
+; TODO: We should fix isDereferenceableAndAlignedInLoop and
+; getSmallConstantMaxTripCount to cope with SCEV predicates when
+; requesting the small constant max trip count.
+define i32 @diff_exit_block_needs_scev_check(i32 %end) {
+; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
+; DEBUG-NEXT: LV: Not vectorizing: Loop may fault.
+; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check(
+; CHECK-SAME: i32 [[END:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32
+; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; CHECK: found:
+; CHECK-NEXT: ret i32 1
+; CHECK: exit:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %p1 = alloca [1024 x i32]
+ %p2 = alloca [1024 x i32]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ %end.clamped = and i32 %end, 1023
+ br label %for.body
+
+for.body:
+ %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ]
+ %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ]
+ %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind
+ %0 = load i32, ptr %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind
+ %1 = load i32, ptr %arrayidx2, align 4
+ %cmp.early = icmp eq i32 %0, %1
+ br i1 %cmp.early, label %found, label %for.inc
+
+for.inc:
+ %ind.next = add i8 %ind, 1
+ %conv = zext i8 %ind.next to i32
+ %gep.ind.next = add i64 %gep.ind, 1
+ %cmp = icmp ult i32 %conv, %end.clamped
+ br i1 %cmp, label %for.body, label %exit
+
+found:
+ ret i32 1
+
+exit:
+ ret i32 0
+}
+
+
+declare void @abort()
+
+; This is a variant of an early exit loop where the condition for leaving
+; early is loop invariant.
+define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) {
+; DEBUG-LABEL: LV: Checking a loop in 'diff_blocks_invariant_early_exit_cond'
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: 275
+; DEBUG: LV: Not vectorizing: Auto-vectorization of early exit loops is not yet supported.
+; CHECK-LABEL: define i32 @diff_blocks_invariant_early_exit_cond(
+; CHECK-SAME: ptr [[S:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: early.exit:
+; CHECK-NEXT: tail call void @abort()
+; CHECK-NEXT: unreachable
+; CHECK: for.end:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %sval = load i32, ptr %s, align 4
+ %cond = icmp eq i32 %sval, 0
+ br label %for.body
+
+for.body:
+ %ind = phi i32 [ -10, %entry ], [ %ind.next, %for.inc ]
+ br i1 %cond, label %for.inc, label %early.exit
+
+for.inc:
+ %ind.next = add nsw i32 %ind, 1
+ %exitcond.not = icmp eq i32 %ind.next, 266
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+early.exit:
+ tail call void @abort()
+ unreachable
+
+for.end:
+ ret i32 0
+}
+
+
+define i64 @early_exit_has_multiple_outside_successors() {
+; DEBUG-LABEL: LV: Checking a loop in 'early_exit_has_multiple_outside_successors'
+; DEBUG: LV: Not vectorizing: Loop contains an unsupported switch
+; CHECK-LABEL: define i64 @early_exit_has_multiple_outside_successors(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: switch i8 [[LD1]], label [[LOOP_INC]] [
+; CHECK-NEXT: i8 2, label [[LOOP_END:%.*]]
+; CHECK-NEXT: i8 3, label [[LOOP_SURPRISE:%.*]]
+; CHECK-NEXT: ]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.surprise:
+; CHECK-NEXT: ret i64 3
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ switch i8 %ld1, label %loop.inc [
+ i8 2, label %loop.end
+ i8 3, label %loop.surprise
+ ]
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.surprise:
+ ret i64 3
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
+; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas'
+; DEBUG: LV: Not vectorizing: Loop may fault.
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [42 x i8]
+ %p2 = alloca [42 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index ac33f6e3e6f728..99911b251c81e1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -12,7 +12,7 @@
; }
; }
; File, line, and column should match those specified in the metadata
-; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop
; CHECK: remark: source.cpp:5:9: loop not vectorized
; void test_disabled(int *A, int Length) {
@@ -46,12 +46,12 @@
; YAML: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
-; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: Name: EarlyExitNotLatchPredecessor
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 5, Column: 9 }
; YAML-NEXT: Function: _Z4testPii
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'loop not vectorized: '
-; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: - String: Cannot vectorize early exit loop
; YAML-NEXT: ...
; YAML-NEXT: --- !Missed
; YAML-NEXT: Pass: loop-vectorize
@@ -117,12 +117,12 @@
; YAML-NEXT: ...
; YAML-NEXT: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
-; YAML-NEXT: Name: CantComputeNumberOfIterations
+; YAML-NEXT: Name: EarlyExitNotLatchPredecessor
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
; YAML-NEXT: Function: test_multiple_failures
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'loop not vectorized: '
-; YAML-NEXT: - String: could not determine number of loop iterations
+; YAML-NEXT: - String: Cannot vectorize early exit loop
; YAML-NEXT: ...
; YAML: --- !Missed
; YAML-NEXT: Pass: loop-vectorize
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index a27f2f0841bca8..3a8aec34dfe43e 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
index 2a5240e73c6f8b..46a4592d0208ee 100644
--- a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
@@ -3,7 +3,7 @@
; Make sure LV does not crash when generating remarks for loops with non-unique
; exit blocks.
define i32 @test_non_unique_exit_blocks(ptr nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
-; CHECK: loop not vectorized: could not determine number of loop iterations
+; CHECK: loop not vectorized: Cannot vectorize early exit loop
;
entry:
br label %for.header
>From d0cf2c64114618173e4ca7ea92753c1247875010 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 10 Sep 2024 08:55:51 +0000
Subject: [PATCH 2/3] Address review comments
* Reverted LoopAccessAnalysis changes in favour of new code in
LoopVectorizationLegality.cpp that checks if instructions are
safe to speculatively execute.
* Updated some comments and done minor code refactoring.
* Added new tests for demonstrating under which circumstances
we can speculatively execute calls and divides.
* Added new test where the early exit is in a conditional block.
* Added new test containing store instructions.
* Added new test containing loads after early exit.
---
.../llvm/Analysis/LoopAccessAnalysis.h | 4 +-
.../Vectorize/LoopVectorizationLegality.h | 8 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 7 +-
.../Vectorize/LoopVectorizationLegality.cpp | 86 ++--
.../AArch64/simple_early_exit.ll | 384 ++++++++++++++++++
5 files changed, 450 insertions(+), 39 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 8d7eb0cb3e87e8..a35bc7402d1a89 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -661,8 +661,7 @@ class LoopAccessInfo {
bool isInvariant(Value *V) const;
unsigned getNumStores() const { return NumStores; }
- unsigned getNumLoads() const { return NumLoads; }
- unsigned getNumCalls() const { return NumCalls; }
+ unsigned getNumLoads() const { return NumLoads;}
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
@@ -755,7 +754,6 @@ class LoopAccessInfo {
unsigned NumLoads = 0;
unsigned NumStores = 0;
- unsigned NumCalls = 0;
/// Cache the result of analyzeLoop.
bool CanVecMem = false;
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index bb21f97c0ae521..ab40f39ea15085 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -480,8 +480,7 @@ class LoopVectorizationLegality {
/// specific checks for outer loop vectorization.
bool canVectorizeOuterLoop();
- /// Returns true if this is a supported early exit loop that we can
- /// vectorize.
+ /// Returns true if this is an early exit loop that can be vectorized.
bool isVectorizableEarlyExitLoop();
/// Return true if all of the instructions in the block can be speculatively
@@ -594,10 +593,11 @@ class LoopVectorizationLegality {
/// uncountable exiting block that is not the latch.
bool HasSpeculativeEarlyExit = false;
- /// Keeps track of all the exits with known or countable exit-not-taken
- /// counts.
+ /// Keep track of all the loop exiting blocks.
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
+
+ /// Keep track of the destinations of all uncountable exits.
SmallVector<BasicBlock *, 4> UncountableExitBlocks;
};
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bf367a77aae1ce..980f142f113265 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2445,11 +2445,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// vectorize a loop if it contains known function calls that don't set
// the flag. Therefore, it is safe to ignore this read from memory.
auto *Call = dyn_cast<CallInst>(&I);
- if (Call) {
- NumCalls++;
- if (getVectorIntrinsicIDForCall(Call, TLI))
- continue;
- }
+ if (Call && getVectorIntrinsicIDForCall(Call, TLI))
+ continue;
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 64f1d4187f1b05..3229bc2f5e6718 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1086,16 +1086,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
return false;
}
- if (LAI->getNumCalls()) {
- reportVectorizationFailure(
- "Calls unsupported in early exit loops",
- "Cannot vectorize early exit loop with function calls",
- "CallsInEarlyExitLoop", ORE, TheLoop);
- return false;
- }
-
// The vectoriser cannot handle loads that occur after the early exit block.
BasicBlock *LatchBB = TheLoop->getLoopLatch();
+ assert(LatchBB->getUniquePredecessor() ==
+ getUncountableExitingBlocks()[0] &&
+ "Expected latch predecessor to be the early exiting block");
+
for (Instruction &I : *LatchBB) {
if (I.mayReadFromMemory()) {
reportVectorizationFailure(
@@ -1104,10 +1100,15 @@ bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
"LoadsAfterEarlyExit", ORE, TheLoop);
return false;
}
+ // Any other problematic instructions should have been caught earlier.
+ assert(!I.mayWriteToMemory() && !I.mayThrow() &&
+ !I.mayHaveSideEffects() &&
+ "Unexpected instructions in latch block of early exit loop");
}
// The vectoriser does not yet handle loops that may fault, but this will
// be improved in a follow-on patch.
+ // TODO: Handle loops that may fault.
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
reportVectorizationFailure(
"Loop may fault",
@@ -1499,11 +1500,18 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
return false;
}
+ if (Reductions.size() || FixedOrderRecurrences.size()) {
+ reportVectorizationFailure(
+ "Found reductions or recurrences in early-exit loop",
+ "Cannot vectorize early exit loop with reductions or recurrences",
+ "RecurrencesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
SmallVector<BasicBlock *, 8> ExitingBlocks;
TheLoop->getExitingBlocks(ExitingBlocks);
- // Keep a record of all the exiting blocks with exact exit counts, as well as
- // those with inexact counts.
+ // Keep a record of all the exiting blocks.
SmallVector<const SCEVPredicate *, 4> Predicates;
for (BasicBlock *BB1 : ExitingBlocks) {
const SCEV *EC =
@@ -1511,20 +1519,23 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
if (isa<SCEVCouldNotCompute>(EC)) {
UncountableExitingBlocks.push_back(BB1);
- unsigned NumExitBlocks = 0;
- for (BasicBlock *BB2 : successors(BB1)) {
- if (!TheLoop->contains(BB2)) {
- UncountableExitBlocks.push_back(BB2);
- NumExitBlocks++;
- }
- }
- if (NumExitBlocks > 1) {
+ SmallVector<BasicBlock *, 2> Succs(successors(BB1));
+ if (Succs.size() != 2) {
reportVectorizationFailure(
- "Early exiting block has more than one successor outside of loop",
- "Too many successors from early exiting block",
+ "Early exiting block does not have exactly two successors",
+ "Incorrect number of successors from early exiting block",
"EarlyExitTooManySuccessors", ORE, TheLoop);
return false;
}
+
+ BasicBlock *BB2;
+ if (!TheLoop->contains(Succs[0]))
+ BB2 = Succs[0];
+ else {
+ assert(!TheLoop->contains(Succs[1]));
+ BB2 = Succs[1];
+ }
+ UncountableExitBlocks.push_back(BB2);
} else
CountableExitingBlocks.push_back(BB1);
}
@@ -1549,13 +1560,34 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
return false;
}
- if (Reductions.size() || FixedOrderRecurrences.size()) {
- reportVectorizationFailure(
- "Found reductions or recurrences in early-exit loop",
- "Cannot vectorize early exit loop with reductions or recurrences",
- "RecurrencesInEarlyExitLoop", ORE, TheLoop);
- return false;
- }
+ // Check all instructions in the loop to see if they could potentially
+ // generate exceptions or have side-effects.
+ auto IsSafeOperation = [](Instruction *I) -> bool {
+ // Is this a divide?
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::PHI:
+ case Instruction::Br:
+ // These are checked separately. For example, canVectorizeMemory will
+ // analyze the loads and stores in the loop.
+ return true;
+ default:
+ return isSafeToSpeculativelyExecute(I);
+ }
+ };
+
+ for (auto *BB : TheLoop->blocks())
+ for (auto &I : *BB)
+ if (!IsSafeOperation(&I)) {
+ reportVectorizationFailure("Early exit loop contains operations that "
+ "cannot be speculatively executed",
+ "Early exit loop contains operations that "
+ "cannot be speculatively executed",
+ "UnsafeOperationsEarlyExitLoop", ORE,
+ TheLoop);
+ return false;
+ }
LLVM_DEBUG(
dbgs()
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index d677598399f1c2..bfa00ebed0d0af 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -1106,6 +1106,383 @@ loop.end:
}
+define i64 @loop_contains_safe_call() {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_call'
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: 63
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; CHECK-LABEL: define i64 @loop_contains_safe_call(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index
+ %ld1 = load float, ptr %arrayidx, align 1
+ %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1)
+ %cmp = fcmp fast ult float %sqrt, 3.0e+00
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_unsafe_call() {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call'
+; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
+; CHECK-LABEL: define i64 @loop_contains_unsafe_call(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %bad_call = call i32 @foo(i32 %ld1) #0
+ %cmp = icmp eq i32 %bad_call, 34
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_safe_div() {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_div'
+; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
+; DEBUG-NEXT: LV: Found speculative backedge taken count: 63
+; DEBUG-NEXT: LV: We can vectorize this loop!
+; CHECK-LABEL: define i64 @loop_contains_safe_div(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX2]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %div = udiv i32 %ld1, 20000
+ %cmp = icmp eq i32 %div, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_unsafe_div() {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div'
+; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed.
+; CHECK-LABEL: define i64 @loop_contains_unsafe_div(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 20000, [[LD1]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %div = udiv i32 20000, %ld1
+ %cmp = icmp eq i32 %div, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_store(ptr %dest) {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_store'
+; DEBUG: LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK-LABEL: define i64 @loop_contains_store(
+; CHECK-SAME: ptr [[DEST:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT: store i32 [[LD1]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index
+ store i32 %ld1, ptr %arrayidx2, align 4
+ %cmp = icmp eq i32 %ld1, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @loop_contains_load_after_early_exit(ptr %p2) {
+; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
+; DEBUG: LV: Not vectorizing: Loads not permitted after early exit
+; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit(
+; CHECK-SAME: ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 1
+ %cmp = icmp eq i32 %ld1, 1
+ br i1 %cmp, label %loop.inc, label %loop.end
+
+loop.inc:
+ %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %index
+ %ld2 = load i32, ptr %arrayidx2, align 1
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @early_exit_in_conditional_block(ptr %mask) {
+; DEBUG-LABEL: LV: Checking a loop in 'early_exit_in_conditional_block'
+; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor.
+; CHECK-LABEL: define i64 @early_exit_in_conditional_block(
+; CHECK-SAME: ptr [[MASK:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[MASK]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[LD1]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_SEARCH:%.*]], label [[LOOP_INC]]
+; CHECK: loop.search:
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD2]], [[LD3]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP_SEARCH]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index
+ %ld1 = load i8, ptr %arrayidx1, align 1
+ %cmp1 = icmp ne i8 %ld1, 0
+ br i1 %cmp1, label %loop.search, label %loop.inc
+
+loop.search:
+ %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld2 = load i8, ptr %arrayidx2, align 1
+ %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld3 = load i8, ptr %arrayidx3, align 1
+ %cmp2 = icmp eq i8 %ld2, %ld3
+ br i1 %cmp2, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_reverse(
; CHECK-SAME: ) #[[ATTR0]] {
@@ -1582,3 +1959,10 @@ loop.end:
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
ret i64 %retval
}
+
+
+
+declare i32 @foo(i32)
+declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
+
+attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
>From 0087f548d5ab61d06a64be7aafa6e356fd62a078 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 16 Sep 2024 11:59:59 +0000
Subject: [PATCH 3/3] Address review comments
* Combined changes from canVectorizeMemory into
isVectorizableEarlyExitLoop.
* Documented restrictions for isVectorizableEarlyExitLoop in
header file.
* Added reportVectorizationFailure call in LoopVectorize when
we bail out.
* General code refactor, clean-up.
---
.../Vectorize/LoopVectorizationLegality.h | 17 ++-
.../Vectorize/LoopVectorizationLegality.cpp | 104 ++++++++----------
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../AArch64/simple_early_exit.ll | 6 +-
4 files changed, 67 insertions(+), 66 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index ab40f39ea15085..e617fdfaedbdd4 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -470,7 +470,7 @@ class LoopVectorizationLegality {
/// we read and write from memory. This method checks if it is
/// legal to vectorize the code, considering only memory constrains.
/// Returns true if the loop is vectorizable
- bool canVectorizeMemory(bool IsEarlyExitLoop);
+ bool canVectorizeMemory();
/// Return true if we can vectorize this loop using the IF-conversion
/// transformation.
@@ -481,6 +481,21 @@ class LoopVectorizationLegality {
bool canVectorizeOuterLoop();
/// Returns true if this is an early exit loop that can be vectorized.
+ /// Currently, a loop with an uncountable early exit is considered
+ /// vectorizable if:
+ /// 1. There are no writes to memory in the loop.
+ /// 2. The loop has only one early uncountable exit
+ /// 3. The early exit block dominates the latch block.
+ /// 4. The latch block has an exact exit count.
+ /// 5. There are no loads after the early exit block.
+ /// 6. The loop does not contain reductions or recurrences.
+ /// 7. We can prove at compile-time that loops will not contain faulting
+ /// loads.
+ /// 8. It is safe to speculatively execute instructions such as divide or
+ /// call instructions.
+ /// The list above is not based on theoretical limitations of vectorization,
+ /// but simply a statement that more work is needed to support these
+ /// additional cases safely.
bool isVectorizableEarlyExitLoop();
/// Return true if all of the instructions in the block can be speculatively
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3229bc2f5e6718..31ca3bd8ce0889 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1051,7 +1051,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return true;
}
-bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
+bool LoopVectorizationLegality::canVectorizeMemory() {
LAI = &LAIs.getInfo(*TheLoop);
const OptimizationRemarkAnalysis *LAR = LAI->getReport();
if (LAR) {
@@ -1073,51 +1073,6 @@ bool LoopVectorizationLegality::canVectorizeMemory(bool IsEarlyExitLoop) {
return false;
}
- // For loops with uncountable early exiting blocks that are not the latch
- // it's necessary to perform extra checks, since the vectoriser is currently
- // only capable of handling simple search loops.
- if (IsEarlyExitLoop) {
- // We don't support calls or any memory accesses that write to memory.
- if (LAI->getNumStores()) {
- reportVectorizationFailure(
- "Writes to memory unsupported in early exit loops",
- "Cannot vectorize early exit loop with writes to memory",
- "WritesInEarlyExitLoop", ORE, TheLoop);
- return false;
- }
-
- // The vectoriser cannot handle loads that occur after the early exit block.
- BasicBlock *LatchBB = TheLoop->getLoopLatch();
- assert(LatchBB->getUniquePredecessor() ==
- getUncountableExitingBlocks()[0] &&
- "Expected latch predecessor to be the early exiting block");
-
- for (Instruction &I : *LatchBB) {
- if (I.mayReadFromMemory()) {
- reportVectorizationFailure(
- "Loads not permitted after early exit",
- "Cannot vectorize early exit loop with loads after early exit",
- "LoadsAfterEarlyExit", ORE, TheLoop);
- return false;
- }
- // Any other problematic instructions should have been caught earlier.
- assert(!I.mayWriteToMemory() && !I.mayThrow() &&
- !I.mayHaveSideEffects() &&
- "Unexpected instructions in latch block of early exit loop");
- }
-
- // The vectoriser does not yet handle loops that may fault, but this will
- // be improved in a follow-on patch.
- // TODO: Handle loops that may fault.
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
- reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
- return false;
- }
- }
-
// We can vectorize stores to invariant address when final reduction value is
// guaranteed to be stored at the end of the loop. Also, if decision to
// vectorize loop is made, runtime checks are added so as to make sure that
@@ -1491,7 +1446,6 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
}
bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
- // At least one of the exiting blocks must be the latch.
BasicBlock *LatchBB = TheLoop->getLoopLatch();
if (!LatchBB) {
reportVectorizationFailure("Loop does not have a latch",
@@ -1553,24 +1507,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// The only supported early exit loops so far are ones where the early
// exiting block is a unique predecessor of the latch block.
BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
- if (!LatchPredBB || LatchPredBB != getUncountableExitingBlocks()[0]) {
+ if (LatchPredBB != getSpeculativeEarlyExitingBlock()) {
reportVectorizationFailure("Early exit is not the latch predecessor",
"Cannot vectorize early exit loop",
"EarlyExitNotLatchPredecessor", ORE, TheLoop);
return false;
}
- // Check all instructions in the loop to see if they could potentially
- // generate exceptions or have side-effects.
+ // Check to see if there are instructions that could potentially generate
+ // exceptions or have side-effects.
auto IsSafeOperation = [](Instruction *I) -> bool {
- // Is this a divide?
switch (I->getOpcode()) {
case Instruction::Load:
case Instruction::Store:
case Instruction::PHI:
case Instruction::Br:
- // These are checked separately. For example, canVectorizeMemory will
- // analyze the loads and stores in the loop.
+ // These are checked separately.
return true;
default:
return isSafeToSpeculativelyExecute(I);
@@ -1578,8 +1530,15 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
};
for (auto *BB : TheLoop->blocks())
- for (auto &I : *BB)
- if (!IsSafeOperation(&I)) {
+ for (auto &I : *BB) {
+ if (I.mayWriteToMemory()) {
+ // We don't support writes to memory.
+ reportVectorizationFailure(
+ "Writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ } else if (!IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"Early exit loop contains operations that "
@@ -1588,10 +1547,9 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
TheLoop);
return false;
}
+ }
- LLVM_DEBUG(
- dbgs()
- << "LV: Found an early exit. Retrying with speculative exit count.\n");
+ // At least one of the exiting blocks must be the latch.
if (isa<SCEVCouldNotCompute>(
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
reportVectorizationFailure(
@@ -1601,6 +1559,34 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
return false;
}
+ // The vectoriser cannot handle loads that occur after the early exit block.
+ assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() &&
+ "Expected latch predecessor to be the early exiting block");
+ for (Instruction &I : *LatchBB) {
+ if (I.mayReadFromMemory()) {
+ reportVectorizationFailure(
+ "Loads not permitted after early exit",
+ "Cannot vectorize early exit loop with loads after early exit",
+ "LoadsAfterEarlyExit", ORE, TheLoop);
+ return false;
+ }
+ // Any other problematic instructions should have been caught earlier.
+ assert(!I.mayWriteToMemory() && !I.mayThrow() && !I.mayHaveSideEffects() &&
+ "Unexpected instructions in latch block of early exit loop");
+ }
+
+ // TODO: Handle loops that may fault.
+ if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Found an early exit. Retrying with speculative exit count.\n");
const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
"Failed to get symbolic expression for backedge taken count");
@@ -1682,7 +1668,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
}
// Go over each instruction and look at memory deps.
- if (!canVectorizeMemory(HasSpeculativeEarlyExit)) {
+ if (!canVectorizeMemory()) {
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
if (DoExtraAnalysis)
Result = false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fcdfdbe5e6cd52..804bcc51b960bf 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9816,8 +9816,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
if (LVL.hasSpeculativeEarlyExit()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Auto-vectorization of early "
- << "exit loops is not yet supported.\n");
+ reportVectorizationFailure(
+ "Auto-vectorization of early exit loops is not yet supported.",
+ "Auto-vectorization of early exit loops is not yet supported.",
+ "EarlyExitLoopsUnsupported", ORE, L);
return false;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index bfa00ebed0d0af..8c5053d166a6f9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -1648,9 +1648,7 @@ loop.end:
; requesting the small constant max trip count.
define i32 @diff_exit_block_needs_scev_check(i32 %end) {
; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check'
-; DEBUG: LV: Found an early exit. Retrying with speculative exit count.
-; DEBUG-NEXT: LV: Found speculative backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32)))<nsw>
-; DEBUG-NEXT: LV: Not vectorizing: Loop may fault.
+; DEBUG: LV: Not vectorizing: Loop may fault.
; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check(
; CHECK-SAME: i32 [[END:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
@@ -1962,7 +1960,7 @@ loop.end:
-declare i32 @foo(i32)
+declare i32 @foo(i32) readonly
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
More information about the llvm-commits
mailing list