[llvm] [LV] Vectorize early exit loops with multiple exits. (PR #174864)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 08:16:30 PST 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/174864
>From 56d823ea42d03f6ff78360e2d720f65e08b53f7a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 16 Jan 2026 13:34:43 +0000
Subject: [PATCH 01/11] [LV] Vectorize early exit loops with multiple exits.
https://github.com/llvm/llvm-project/pull/174864
---
.../Transforms/Vectorize/LoopVectorize.cpp | 9 -
llvm/lib/Transforms/Vectorize/VPlan.h | 3 +
.../Vectorize/VPlanConstruction.cpp | 35 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 203 ++++--
.../Transforms/Vectorize/VPlanTransforms.h | 15 +-
.../LoopVectorize/early_exit_legality.ll | 4 +-
.../LoopVectorize/multiple-early-exits.ll | 686 +++++++++++++-----
.../uncountable-early-exit-vplan.ll | 59 +-
.../LoopVectorize/unsupported_early_exit.ll | 61 +-
10 files changed, 720 insertions(+), 363 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f7a0a12a990c5..52acf885146c1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9780,15 +9780,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
"UncountableEarlyExitLoopsDisabled", ORE, L);
return false;
}
- SmallVector<BasicBlock *, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- // TODO: Support multiple uncountable early exits.
- if (ExitingBlocks.size() - LVL.getCountableExitingBlocks().size() > 1) {
- reportVectorizationFailure("Auto-vectorization of loops with multiple "
- "uncountable early exits is not yet supported",
- "MultipleUncountableEarlyExits", ORE, L);
- return false;
- }
}
if (!LVL.getPotentiallyFaultingLoads().empty()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 94a19beb75a8f..0c068101e6919 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1398,6 +1398,9 @@ class VPPhiAccessors {
/// Returns the incoming block with index \p Idx.
const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
+ /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
+ VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
+
/// Returns the number of incoming values, also number of incoming blocks.
virtual unsigned getNumIncoming() const {
return getAsRecipe()->getNumOperands();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 1f8243d5f6c72..30745f90d889d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -873,33 +873,28 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
- // Disconnect all early exits from the loop leaving it with a single exit from
- // the latch. Early exits that are countable are left for a scalar epilog. The
- // condition of uncountable early exits (currently at most one is supported)
- // is fused into the latch exit, and used to branch from middle block to the
- // early exit destination.
- [[maybe_unused]] bool HandledUncountableEarlyExit = false;
+ // Disconnect countable early exits from the loop, leaving it with a single
+ // exit from the latch. Countable early exits are left for a scalar epilog.
+ // When there are uncountable early exits, skip this loop entirely - they are
+ // handled separately in handleUncountableEarlyExits.
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
- if (Pred == MiddleVPBB)
+ if (Pred == MiddleVPBB || HasUncountableEarlyExit)
continue;
- if (HasUncountableEarlyExit) {
- assert(!HandledUncountableEarlyExit &&
- "can handle exactly one uncountable early exit");
- handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
- cast<VPBasicBlock>(HeaderVPB), LatchVPBB);
- HandledUncountableEarlyExit = true;
- } else {
- for (VPRecipeBase &R : EB->phis())
- cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
- }
- cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
+
+ // Remove phi operands for the early exiting block.
+ for (VPRecipeBase &R : EB->phis())
+ cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
+ auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
+ EarlyExitingVPBB->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}
- assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
- "missed an uncountable exit that must be handled");
+ if (HasUncountableEarlyExit) {
+ handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
+ MiddleVPBB);
+ }
}
void VPlanTransforms::addMiddleCheck(VPlan &Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b10dd17fbfc89..f28a62eb4059c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1718,6 +1718,14 @@ void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
R->removeOperand(Position);
}
+VPValue *
+VPPhiAccessors::getIncomingValueForBlock(const VPBasicBlock *VPBB) const {
+ for (unsigned Idx = 0; Idx != getNumIncoming(); ++Idx)
+ if (getIncomingBlock(Idx) == VPBB)
+ return getIncomingValue(Idx);
+ llvm_unreachable("VPBB is not an incoming block");
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a39b171ab4cd6..dec9cbb465026 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3944,75 +3944,147 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
R->eraseFromParent();
}
-void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
- VPBasicBlock *EarlyExitVPBB,
- VPlan &Plan,
- VPBasicBlock *HeaderVPBB,
- VPBasicBlock *LatchVPBB) {
- auto *MiddleVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[0]);
- if (!EarlyExitVPBB->getSinglePredecessor() &&
- EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
- assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
- EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
- "unsupported early exit VPBB");
- // Early exit operand should always be last phi operand. If EarlyExitVPBB
- // has two predecessors and EarlyExitingVPBB is the first, swap the operands
- // of the phis.
- for (VPRecipeBase &R : EarlyExitVPBB->phis())
- cast<VPIRPhi>(&R)->swapOperands();
- }
+void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
+ VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB,
+ VPBasicBlock *MiddleVPBB) {
+ struct EarlyExitInfo {
+ VPBasicBlock *EarlyExitingVPBB;
+ VPIRBasicBlock *EarlyExitVPBB;
+ VPValue *CondToExit;
+ };
VPBuilder Builder(LatchVPBB->getTerminator());
- VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
- assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
- "Terminator must be be BranchOnCond");
- VPValue *CondOfEarlyExitingVPBB =
- EarlyExitingVPBB->getTerminator()->getOperand(0);
- auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
- ? CondOfEarlyExitingVPBB
- : Builder.createNot(CondOfEarlyExitingVPBB);
-
- // Create a BranchOnTwoConds in the latch that branches to:
- // [0] vector.early.exit, [1] middle block, [2] header (continue looping).
- VPValue *IsEarlyExitTaken =
- Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
- VPBasicBlock *VectorEarlyExitVPBB =
- Plan.createVPBasicBlock("vector.early.exit");
- VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());
-
- VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
-
- // Update the exit phis in the early exit block.
- VPBuilder MiddleBuilder(MiddleVPBB);
- VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
- auto *ExitIRI = cast<VPIRPhi>(&R);
- // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
- // a single predecessor and 1 if it has two.
- unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
- if (ExitIRI->getNumOperands() != 1) {
- // The first of two operands corresponds to the latch exit, via MiddleVPBB
- // predecessor. Extract its final lane.
- ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
+ SmallVector<EarlyExitInfo> Exits;
+ for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
+ for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+ if (Pred == MiddleVPBB)
+ continue;
+ // Collect condition for this early exit.
+ auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
+ VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+ assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
+ "Terminator must be BranchOnCond");
+ VPValue *CondOfEarlyExitingVPBB =
+ EarlyExitingVPBB->getTerminator()->getOperand(0);
+ auto *CondToEarlyExit = TrueSucc == EB
+ ? CondOfEarlyExitingVPBB
+ : Builder.createNot(CondOfEarlyExitingVPBB);
+ Exits.push_back({
+ EarlyExitingVPBB,
+ EB,
+ CondToEarlyExit,
+ });
}
+ }
+
+ // Sort exits by dominance to get the correct program order.
+ VPDominatorTree VPDT(Plan);
+ llvm::sort(Exits, [&VPDT](const EarlyExitInfo &A, const EarlyExitInfo &B) {
+ return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
+ });
- VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
- if (!isa<VPIRValue>(IncomingFromEarlyExit)) {
- // Update the incoming value from the early exit.
- VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
- VPInstruction::FirstActiveLane, {CondToEarlyExit},
- DebugLoc::getUnknown(), "first.active.lane");
- IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- VPInstruction::ExtractLane, {FirstActiveLane, IncomingFromEarlyExit},
- DebugLoc::getUnknown(), "early.exit.value");
- ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+ // Build the AnyOf condition for the latch terminator. For multiple exits,
+ // also create an exit dispatch block to determine which exit to take.
+ VPValue *Combined = Exits[0].CondToExit;
+ for (const auto &Exit : drop_begin(Exits))
+ Combined = Builder.createOr(Combined, Exit.CondToExit);
+ VPValue *IsAnyExitTaken =
+ Builder.createNaryOp(VPInstruction::AnyOf, {Combined});
+
+ VPSymbolicValue FirstActiveLane;
+ // Process exits in reverse order so phi operands are added in the order
+ // matching the original program order (last exit's operand added first
+ // becomes last). The vector is reversed afterwards to restore forward order
+ // for the dispatch logic.
+ SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs;
+ for (const auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] :
+ reverse(Exits)) {
+ VPBasicBlock *VectorEarlyExitVPBB =
+ Plan.createVPBasicBlock("vector.early.exit");
+ VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());
+ VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);
+
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+ auto *ExitIRI = cast<VPIRPhi>(&R);
+ VPValue *IncomingVal =
+ ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
+
+ // Compute the incoming value for this early exit.
+ VPValue *NewIncoming = IncomingVal;
+ if (!isa<VPIRValue>(IncomingVal)) {
+ VPBuilder EarlyExitB(VectorEarlyExitVPBB);
+ NewIncoming = EarlyExitB.createNaryOp(
+ VPInstruction::ExtractLane, {&FirstActiveLane, IncomingVal},
+ DebugLoc::getUnknown(), "early.exit.value");
+ }
+ ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
+ // Add the new incoming value for this early exit.
+ ExitIRI->addOperand(NewIncoming);
}
+
+ EarlyExitingVPBB->getTerminator()->eraseFromParent();
+ VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
+ VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
}
+ VectorEarlyExitVPBBs = to_vector(llvm::reverse(VectorEarlyExitVPBBs));
- // Replace the conditional branch controlling the latch exit from the vector
- // loop with a multi-conditional branch exiting to vector early exit if the
- // early exit has been taken, exiting to middle block if the original
- // condition of the vector latch is true, otherwise continuing back to header.
+ // For exit blocks that also have the middle block as predecessor (latch
+ // exit to the same block as an early exit), extract the last lane of the
+ // first operand for the middle block's incoming value.
+ VPBuilder MiddleBuilder(MiddleVPBB);
+ for (VPRecipeBase &R :
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->phis()) {
+ auto *ExitIRI = cast<VPIRPhi>(&R);
+ if (ExitIRI->getNumOperands() == 1)
+ continue;
+ ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
+ }
+
+ if (Exits.size() != 1) {
+ VPBasicBlock *DispatchBB = Plan.createVPBasicBlock("vector.early.exit");
+ DispatchBB->setParent(VectorEarlyExitVPBBs[0]->getParent());
+ // In the dispatch block, compute the first active lane across all
+ // conditions and chain through exits.
+ VPBuilder DispatchBuilder(DispatchBB);
+ // Chain through exits: for each exit, check if its condition is true at the
+ // first active lane. If so, take that exit. Otherwise, try the next exit.
+ VPBasicBlock *CurrentBB = DispatchBB;
+ for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
+ VPValue *LaneVal = DispatchBuilder.createNaryOp(
+ VPInstruction::ExtractLane, {&FirstActiveLane, Exit.CondToExit},
+ DebugLoc::getUnknown(), "exit.cond.at.lane");
+
+ // For the last dispatch, branch directly to the last exit on false;
+ // otherwise, create a new check block.
+ bool IsLastDispatch = (I + 2 == Exits.size());
+ VPBasicBlock *FalseBB =
+ IsLastDispatch ? VectorEarlyExitVPBBs.back()
+ : Plan.createVPBasicBlock("vector.early.exit.check");
+ if (!IsLastDispatch)
+ FalseBB->setParent(LatchVPBB->getParent());
+
+ DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
+ CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
+ VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
+ FalseBB->setPredecessors({CurrentBB});
+
+ if (!IsLastDispatch) {
+ CurrentBB = FalseBB;
+ DispatchBuilder.setInsertPoint(CurrentBB);
+ }
+ }
+ VectorEarlyExitVPBBs[0] = DispatchBB;
+ }
+
+ VPBuilder DispatchBuilder(VectorEarlyExitVPBBs[0],
+ VectorEarlyExitVPBBs[0]->begin());
+ VPValue *FirstLane =
+ DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined},
+ DebugLoc::getUnknown(), "first.active.lane");
+ FirstActiveLane.replaceAllUsesWith(FirstLane);
+
+ // Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
@@ -4022,13 +4094,12 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
LatchExitingBranch->eraseFromParent();
-
Builder.setInsertPoint(LatchVPBB);
Builder.createNaryOp(VPInstruction::BranchOnTwoConds,
- {IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
+ {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
LatchVPBB->clearSuccessors();
- LatchVPBB->setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
- VectorEarlyExitVPBB->setPredecessors({LatchVPBB});
+ LatchVPBB->setSuccessors({VectorEarlyExitVPBBs[0], MiddleVPBB, HeaderVPBB});
+ VectorEarlyExitVPBBs[0]->setPredecessors({LatchVPBB});
}
/// This function tries convert extended in-loop reductions to
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e0d09a099647a..5dc58f4bd56cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -292,14 +292,13 @@ struct VPlanTransforms {
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
- /// Update \p Plan to account for the uncountable early exit from \p
- /// EarlyExitingVPBB to \p EarlyExitVPBB by introducing a BranchOnTwoConds
- /// terminator in the latch that handles the early exit and the latch exit
- /// condition.
- static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
- VPBasicBlock *EarlyExitVPBB,
- VPlan &Plan, VPBasicBlock *HeaderVPBB,
- VPBasicBlock *LatchVPBB);
+ /// Update \p Plan to account for uncountable early exits by introducing
+ /// appropriate branching logic in the latch that handles early exits and the
+ /// latch exit condition. Multiple exits are handled with a dispatch block
+ /// that determines which exit to take based on lane-by-lane semantics.
+ static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB,
+ VPBasicBlock *LatchVPBB,
+ VPBasicBlock *MiddleVPBB);
/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index 6c35417bd4492..b4a2b3152d42d 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -346,12 +346,10 @@ loop.end:
}
-; Multiple uncountable early exits pass legality but are not yet supported
-; in VPlan transformations.
+; Multiple uncountable early exits are now supported.
define i64 @multiple_uncountable_exits() {
; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits'
; CHECK: LV: We can vectorize this loop!
-; CHECK: LV: Not vectorizing: Auto-vectorization of loops with multiple uncountable early exits is not yet supported.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
index 75cdfe2b9cdd5..9422bf4dc70bc 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
@@ -7,29 +7,60 @@ declare void @init_mem(ptr, i64)
define i64 @two_early_exits_same_exit() {
; CHECK-LABEL: define i64 @two_early_exits_same_exit() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
+; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 64, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A1]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD1]], 42
-; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT]], label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV1]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER1]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT3]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -64,32 +95,63 @@ exit:
define i64 @two_early_exits_different_exits() {
; CHECK-LABEL: define i64 @two_early_exits_different_exits() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
+; CHECK-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br label %[[EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
+; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 64, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A1]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV1]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT1:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT1]], label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[LD1]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT2:.*]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT2]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER1]], label %[[EXIT:.*]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RET2:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ]
+; CHECK-NEXT: [[RET2:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT3]] ]
; CHECK-NEXT: ret i64 [[RET2]]
; CHECK: [[EXIT2]]:
-; CHECK-NEXT: [[RET3:%.*]] = phi i64 [ 100, %[[EARLY_EXIT_0]] ]
+; CHECK-NEXT: [[RET3:%.*]] = phi i64 [ 100, %[[EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ]
; CHECK-NEXT: ret i64 [[RET3]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i64 43
@@ -200,32 +262,52 @@ exit:
define i64 @three_early_exits_same_exit() {
; CHECK-LABEL: define i64 @three_early_exits_same_exit() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i8 [[LD_A]], 42
-; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i8 [[LD_B]], 100
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[OFFSET_IDX]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 200, %[[EARLY_EXIT_1]] ], [ 43, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 3, [[TMP12]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP13]], %[[VECTOR_EARLY_EXIT4]] ], [ 100, %[[VECTOR_EARLY_EXIT3]] ], [ 200, %[[VECTOR_EARLY_EXIT2]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -264,7 +346,7 @@ exit:
define i64 @four_early_exits_same_exit() {
; CHECK-LABEL: define i64 @four_early_exits_same_exit() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P3:%.*]] = alloca [1024 x i8], align 1
@@ -273,30 +355,54 @@ define i64 @four_early_exits_same_exit() {
; CHECK-NEXT: call void @init_mem(ptr [[P3]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i8 [[LD_A]], 42
-; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD2]], splat (i8 100)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP7]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP8]], [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
+; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_C:%.*]] = load i8, ptr [[GEP_C]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD_B]], [[LD_C]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[EARLY_EXIT_2:.*]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false)
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT7:.*]], label %[[EARLY_EXIT_2:.*]]
; CHECK: [[EARLY_EXIT_2]]:
-; CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i8 [[LD_C]], 100
-; CHECK-NEXT: br i1 [[CMP4]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT6:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[OFFSET_IDX]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 200, %[[EARLY_EXIT_1]] ], [ 300, %[[EARLY_EXIT_2]] ], [ 43, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT5:.*]], label %[[VECTOR_EARLY_EXIT4:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT5]]:
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT6]]:
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT7]]:
+; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP17]], %[[VECTOR_EARLY_EXIT7]] ], [ 100, %[[VECTOR_EARLY_EXIT6]] ], [ 200, %[[VECTOR_EARLY_EXIT5]] ], [ 300, %[[VECTOR_EARLY_EXIT4]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -343,31 +449,50 @@ exit:
define i64 @two_early_exits_with_live_out_values() {
; CHECK-LABEL: define i64 @two_early_exits_with_live_out_values() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i8> [[TMP3]], splat (i8 34)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT: br i1 [[TMP7]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[SUM:%.*]] = add i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[SUM]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[OFFSET_IDX]], %[[LOOP_HEADER]] ], [ [[OFFSET_IDX]], %[[EARLY_EXIT_0]] ], [ 99, %[[LOOP_LATCH]] ]
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ [[SUM]], %[[EARLY_EXIT_0]] ], [ 0, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 3, [[TMP11]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 3, [[TMP13]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT2]] ], [ 99, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT2]] ], [ 0, %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL2]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -408,29 +533,62 @@ exit:
define i64 @two_early_exits_negated_condition() {
; CHECK-LABEL: define i64 @two_early_exits_negated_condition() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A]], align 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 124
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
+; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 127, %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
+; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A1]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV1]]
; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP1]], label %[[EARLY_EXIT_0:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[EARLY_EXIT_0:.*]], label %[[EXIT]]
; CHECK: [[EARLY_EXIT_0]]:
; CHECK-NEXT: [[CMP2:%.*]] = icmp uge i8 [[LD_A]], 34
; CHECK-NEXT: br i1 [[CMP2]], label %[[LOOP_LATCH]], label %[[EXIT]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER1]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT3]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -466,40 +624,57 @@ exit:
; Three early exits to three different exit blocks.
define i64 @three_early_exits_three_exit_blocks() {
; CHECK-LABEL: define i64 @three_early_exits_three_exit_blocks() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT1:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT1:.*]], label %[[EARLY_EXIT_0]]
; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i8 [[LD_A]], 34
-; CHECK-NEXT: br i1 [[TMP10]], label %[[EXIT2:.*]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[EARLY_EXIT_1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i8 [[LD_B]], 100
-; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT3:.*]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 99
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]]
+; CHECK-NEXT: br label %[[EXIT4:.*]]
; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RET4:%.*]] = phi i64 [ [[OFFSET_IDX]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: ret i64 [[RET4]]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT3:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP13]], label %[[EXIT2:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br label %[[EXIT5:.*]]
; CHECK: [[EXIT2]]:
-; CHECK-NEXT: [[RET2:%.*]] = phi i64 [ 100, %[[EARLY_EXIT_0]] ]
-; CHECK-NEXT: ret i64 [[RET2]]
+; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT3]]:
-; CHECK-NEXT: [[RET3:%.*]] = phi i64 [ 200, %[[EARLY_EXIT_1]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[RET3:%.*]] = add i64 3, [[TMP12]]
+; CHECK-NEXT: br label %[[EXIT6:.*]]
+; CHECK: [[EXIT6]]:
; CHECK-NEXT: ret i64 [[RET3]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret i64 100
+; CHECK: [[EXIT5]]:
+; CHECK-NEXT: ret i64 200
+; CHECK: [[EXIT4]]:
; CHECK-NEXT: ret i64 43
;
entry:
@@ -573,7 +748,7 @@ define i64 @two_early_exits_iv_live_out() {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
@@ -616,32 +791,49 @@ exit:
define i64 @three_early_exits_iv_and_load_live_out() {
; CHECK-LABEL: define i64 @three_early_exits_iv_and_load_live_out() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[LD_A]], 34
-; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i8 [[LD_B]], 100
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[INDEX]], %[[LOOP_HEADER]] ], [ 128, %[[LOOP_LATCH]] ], [ [[INDEX]], %[[EARLY_EXIT_1]] ]
-; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[LD_A]], %[[LOOP_HEADER]] ], [ 0, %[[LOOP_LATCH]] ], [ [[LD_B]], %[[EARLY_EXIT_1]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8> [[WIDE_LOAD1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT3]] ], [ 128, %[[LOOP_LATCH]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT2]] ]
+; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[TMP13]], %[[VECTOR_EARLY_EXIT3]] ], [ 0, %[[LOOP_LATCH]] ], [ [[TMP11]], %[[VECTOR_EARLY_EXIT2]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_LD]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL_IV]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -763,30 +955,45 @@ exit.latch:
; Two early exits to same block, IV live-out with different incoming values.
define i64 @two_early_exits_iv_diff_incoming() {
; CHECK-LABEL: define i64 @two_early_exits_iv_diff_incoming() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[IV_PLUS1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[LD_A]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP_HEADER]] ], [ [[IV_PLUS1]], %[[EARLY_EXIT_0]] ], [ 200, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 1
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT2]] ], [ 200, %[[LOOP_LATCH]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -822,35 +1029,58 @@ exit:
define { i64, i64 } @three_early_exits_multiple_live_outs() {
; CHECK-LABEL: define { i64, i64 } @three_early_exits_multiple_live_outs() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i8> [[TMP3]], splat (i8 34)
+; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <4 x i8> [[TMP5]], splat (i8 100)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i1> [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[TMP10]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[SUM:%.*]] = add i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[SUM]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[EARLY_EXIT_1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[DIFF:%.*]] = sub i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i8 [[DIFF]], 100
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[INDEX]], %[[LOOP_HEADER]] ], [ [[INDEX]], %[[EARLY_EXIT_0]] ], [ [[INDEX]], %[[EARLY_EXIT_1]] ], [ 128, %[[LOOP_LATCH]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[LD_A]], %[[LOOP_HEADER]] ], [ [[SUM]], %[[EARLY_EXIT_0]] ], [ [[DIFF]], %[[EARLY_EXIT_1]] ], [ 0, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP19]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT2]] ], [ 128, %[[EARLY_EXIT_1]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP18]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT2]] ], [ 0, %[[EARLY_EXIT_1]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_VAL]] to i64
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[EXT]], 1
@@ -899,29 +1129,44 @@ exit:
; Two early exits with second load only executed conditionally (after first exit check).
define i64 @two_early_exits_load_in_early_exit_block() {
; CHECK-LABEL: define i64 @two_early_exits_load_in_early_exit_block() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i8 [[LD_A]], 42
-; CHECK-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV]], %[[EARLY_EXIT_0]] ], [ 128, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -956,7 +1201,7 @@ exit:
define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-LABEL: define i64 @three_early_exits_loads_in_different_blocks() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P3:%.*]] = alloca [1024 x i8], align 1
@@ -965,27 +1210,47 @@ define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-NEXT: call void @init_mem(ptr [[P3]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[LD_A]], 42
-; CHECK-NEXT: br i1 [[TMP9]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
+; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[IV]]
-; CHECK-NEXT: [[LD_C:%.*]] = load i8, ptr [[GEP_C]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD_B]], [[LD_C]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV]], %[[EARLY_EXIT_0]] ], [ [[IV]], %[[EARLY_EXIT_1]] ], [ 128, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false)
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT5:.*]], label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[VECTOR_EARLY_EXIT3:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT5]]:
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT5]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT3]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1028,7 +1293,7 @@ exit:
define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-LABEL: define { i64, i8 } @four_early_exits_with_conditional_loads() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P3:%.*]] = alloca [1024 x i8], align 1
@@ -1039,33 +1304,62 @@ define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-NEXT: call void @init_mem(ptr [[P4]], i64 1024)
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i8 [[LD_A]], 10
-; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
+; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
; CHECK: [[EARLY_EXIT_0]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 10)
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B]], align 1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD_B]], 20
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[EARLY_EXIT_1:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], splat (i8 20)
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD2]], splat (i8 30)
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P4]], i64 [[IV]]
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD3]], splat (i8 40)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
+; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr [[P3]], i64 [[IV]]
-; CHECK-NEXT: [[LD_C:%.*]] = load i8, ptr [[GEP_C]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD_C]], 30
-; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT]], label %[[EARLY_EXIT_2:.*]]
+; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false)
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT8:.*]], label %[[EARLY_EXIT_2:.*]]
; CHECK: [[EARLY_EXIT_2]]:
-; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds i8, ptr [[P4]], i64 [[IV]]
-; CHECK-NEXT: [[LD_D:%.*]] = load i8, ptr [[GEP_D]], align 1
-; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i8 [[LD_D]], 40
-; CHECK-NEXT: br i1 [[CMP4]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT7:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[IV]], %[[LOOP_HEADER]] ], [ [[IV]], %[[EARLY_EXIT_0]] ], [ [[IV]], %[[EARLY_EXIT_1]] ], [ [[IV]], %[[EARLY_EXIT_2]] ], [ 128, %[[LOOP_LATCH]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[LD_A]], %[[LOOP_HEADER]] ], [ [[LD_B]], %[[EARLY_EXIT_0]] ], [ [[LD_C]], %[[EARLY_EXIT_1]] ], [ [[LD_D]], %[[EARLY_EXIT_2]] ], [ 0, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT6:.*]], label %[[VECTOR_EARLY_EXIT5:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT5]]:
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[WIDE_LOAD3]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT6]]:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[WIDE_LOAD2]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT7]]:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i8> [[WIDE_LOAD1]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[VECTOR_EARLY_EXIT8]]:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label %[[EXIT1]]
+; CHECK: [[EXIT1]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP24]], %[[VECTOR_EARLY_EXIT8]] ], [ [[TMP22]], %[[VECTOR_EARLY_EXIT7]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT6]] ], [ [[TMP18]], %[[VECTOR_EARLY_EXIT5]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP23]], %[[VECTOR_EARLY_EXIT8]] ], [ [[TMP21]], %[[VECTOR_EARLY_EXIT7]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT6]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT5]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i8 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i8 } [[R1]], i8 [[RETVAL_VAL]], 1
; CHECK-NEXT: ret { i64, i8 } [[R2]]
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
index be23acd443229..ac07867c29c7a 100644
--- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
@@ -241,7 +241,64 @@ exit:
}
define i64 @two_early_exits_same_exit_with_constant_live_outs() {
-; CHECK: LV: Not vectorizing: Auto-vectorization of loops with multiple uncountable early exits is not yet supported.
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<67> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %A = alloca [1024 x i8], align 1
+; CHECK-NEXT: IR %B = alloca [1024 x i8], align 1
+; CHECK-NEXT: IR call void @init(ptr %A, i64 1024)
+; CHECK-NEXT: IR call void @init(ptr %B, i64 1024)
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
+; CHECK-NEXT: CLONE ir<%gep.A> = getelementptr inbounds ir<%A>, vp<[[SCALAR_STEPS]]>
+; CHECK-NEXT: vp<[[PTRA:%.+]]> = vector-pointer inbounds ir<%gep.A>
+; CHECK-NEXT: WIDEN ir<%ld.A> = load vp<[[PTRA]]>
+; CHECK-NEXT: WIDEN ir<%cmp1> = icmp eq ir<%ld.A>, ir<42>
+; CHECK-NEXT: CLONE ir<%gep.B> = getelementptr inbounds ir<%B>, vp<[[SCALAR_STEPS]]>
+; CHECK-NEXT: vp<[[PTRB:%.+]]> = vector-pointer inbounds ir<%gep.B>
+; CHECK-NEXT: WIDEN ir<%ld.B> = load vp<[[PTRB]]>
+; CHECK-NEXT: WIDEN ir<%cmp2> = icmp eq ir<%ld.A>, ir<%ld.B>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
+; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or ir<%cmp1>, ir<%cmp2>
+; CHECK-NEXT: EMIT vp<[[ANY_OF:%.+]]> = any-of vp<[[OR]]>
+; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
+; CHECK-NEXT: EMIT branch-on-two-conds vp<[[ANY_OF]]>, vp<[[CMP]]>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<67>, vp<[[VTC]]>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: EMIT vp<%first.active.lane> = first-active-lane vp<[[OR]]>
+; CHECK-NEXT: EMIT vp<%exit.cond.at.lane> = extract-lane vp<%first.active.lane>, ir<%cmp1>
+; CHECK-NEXT: EMIT branch-on-cond vp<%exit.cond.at.lane>
+; CHECK-NEXT: Successor(s): vector.early.exit, vector.early.exit
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: Successor(s): ir-bb<exit>
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: EMIT vp<[[FIRST_ACTIVE:%.+]]> = first-active-lane vp<[[OR]]>
+; CHECK-NEXT: EMIT vp<[[FINAL_IV:%.+]]> = add vp<[[CAN_IV]]>, vp<[[FIRST_ACTIVE]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %retval = phi i64 [ %iv, %loop.header ], [ 100, %early.exit.0 ], [ 43, %loop.latch ] (extra operands: ir<43> from middle.block, ir<100> from vector.early.exit, vp<[[FINAL_IV]]> from vector.early.exit)
;
entry:
%A = alloca [1024 x i8]
diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
index 80fbb6e7a49ca..baa0898a6f053 100644
--- a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
@@ -57,65 +57,6 @@ loop.end:
}
-; We don't currently support multiple early exits.
-define i64 @multiple_uncountable_exits() {
-; CHECK-LABEL: define i64 @multiple_uncountable_exits() {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
-; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
-; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
-; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label [[SEARCH1:%.*]]
-; CHECK: search1:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_END:%.*]], label [[SEARCH2:%.*]]
-; CHECK: search2:
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[LD1]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_END]], label [[LOOP_INC]]
-; CHECK: loop.inc:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[SEARCH1]], label [[LOOP_END]]
-; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[SEARCH1]] ], [ 100, [[SEARCH2]] ], [ 43, [[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
-;
-entry:
- %p1 = alloca [1024 x i8]
- %p2 = alloca [1024 x i8]
- call void @init_mem(ptr %p1, i64 1024)
- call void @init_mem(ptr %p2, i64 1024)
- br label %search1
-
-search1:
- %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
- %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
- %ld1 = load i8, ptr %arrayidx, align 1
- %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
- %ld2 = load i8, ptr %arrayidx1, align 1
- %cmp1 = icmp eq i8 %ld1, %ld2
- br i1 %cmp1, label %loop.end, label %search2
-
-search2:
- %cmp2 = icmp ult i8 %ld1, 34
- br i1 %cmp2, label %loop.end, label %loop.inc
-
-loop.inc:
- %index.next = add i64 %index, 1
- %exitcond = icmp ne i64 %index.next, 67
- br i1 %exitcond, label %search1, label %loop.end
-
-loop.end:
- %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ]
- ret i64 %retval
-}
-
-
define i64 @uncountable_exit_infinite_loop() {
; CHECK-LABEL: define i64 @uncountable_exit_infinite_loop() {
; CHECK-NEXT: entry:
@@ -179,7 +120,7 @@ define i64 @loop_contains_unsafe_call() {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
; CHECK: loop.inc:
>From cc05088e21043ea5b53ebf7a9f1a9d9dd3a17c7b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 21 Jan 2026 15:46:39 +0000
Subject: [PATCH 02/11] !fixup unique names for created blocks, fix comment
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 17 ++++++++++-------
.../uncountable-early-exit-vplan.ll | 12 ++++++------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index dec9cbb465026..307bdddf5af56 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3998,11 +3998,12 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
// becomes last). The vector is reversed afterwards to restore forward order
// for the dispatch logic.
SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs;
- for (const auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] :
- reverse(Exits)) {
+ for (auto [I, Exit] : enumerate(reverse(Exits))) {
+ auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] = Exit;
+ unsigned Idx = Exits.size() - 1 - I;
+ Twine BlockSuffix = Exits.size() == 1 ? "" : Twine(".") + Twine(Idx);
VPBasicBlock *VectorEarlyExitVPBB =
- Plan.createVPBasicBlock("vector.early.exit");
- VectorEarlyExitVPBB->setParent(EarlyExitVPBB->getParent());
+ Plan.createVPBasicBlock("vector.early.exit" + BlockSuffix);
VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
@@ -4030,7 +4031,7 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VectorEarlyExitVPBBs = to_vector(llvm::reverse(VectorEarlyExitVPBBs));
// For exit blocks that also have the middle block as predecessor (latch
- // exit to the same block as an early exit), extract the last lane of the
+ // exits to the same block as an early exit), extract the last lane of the
// first operand for the middle block's incoming value.
VPBuilder MiddleBuilder(MiddleVPBB);
for (VPRecipeBase &R :
@@ -4042,7 +4043,8 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
}
if (Exits.size() != 1) {
- VPBasicBlock *DispatchBB = Plan.createVPBasicBlock("vector.early.exit");
+ VPBasicBlock *DispatchBB =
+ Plan.createVPBasicBlock("vector.early.exit.check");
DispatchBB->setParent(VectorEarlyExitVPBBs[0]->getParent());
// In the dispatch block, compute the first active lane across all
// conditions and chain through exits.
@@ -4060,7 +4062,8 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
bool IsLastDispatch = (I + 2 == Exits.size());
VPBasicBlock *FalseBB =
IsLastDispatch ? VectorEarlyExitVPBBs.back()
- : Plan.createVPBasicBlock("vector.early.exit.check");
+ : Plan.createVPBasicBlock(
+ Twine("vector.early.exit.check.") + Twine(I));
if (!IsLastDispatch)
FalseBB->setParent(LatchVPBB->getParent());
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
index ac07867c29c7a..04bea947f4b3b 100644
--- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
@@ -276,29 +276,29 @@ define i64 @two_early_exits_same_exit_with_constant_live_outs() {
; CHECK-NEXT: EMIT branch-on-two-conds vp<[[ANY_OF]]>, vp<[[CMP]]>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): vector.early.exit, middle.block
+; CHECK-NEXT: Successor(s): vector.early.exit.check, middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<67>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
; CHECK-EMPTY:
-; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: vector.early.exit.check:
; CHECK-NEXT: EMIT vp<%first.active.lane> = first-active-lane vp<[[OR]]>
; CHECK-NEXT: EMIT vp<%exit.cond.at.lane> = extract-lane vp<%first.active.lane>, ir<%cmp1>
; CHECK-NEXT: EMIT branch-on-cond vp<%exit.cond.at.lane>
-; CHECK-NEXT: Successor(s): vector.early.exit, vector.early.exit
+; CHECK-NEXT: Successor(s): vector.early.exit.0, vector.early.exit.1
; CHECK-EMPTY:
-; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: vector.early.exit.1:
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
-; CHECK-NEXT: vector.early.exit:
+; CHECK-NEXT: vector.early.exit.0:
; CHECK-NEXT: EMIT vp<[[FIRST_ACTIVE:%.+]]> = first-active-lane vp<[[OR]]>
; CHECK-NEXT: EMIT vp<[[FINAL_IV:%.+]]> = add vp<[[CAN_IV]]>, vp<[[FIRST_ACTIVE]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %retval = phi i64 [ %iv, %loop.header ], [ 100, %early.exit.0 ], [ 43, %loop.latch ] (extra operands: ir<43> from middle.block, ir<100> from vector.early.exit, vp<[[FINAL_IV]]> from vector.early.exit)
+; CHECK-NEXT: IR %retval = phi i64 [ %iv, %loop.header ], [ 100, %early.exit.0 ], [ 43, %loop.latch ] (extra operands: ir<43> from middle.block, ir<100> from vector.early.exit.1, vp<[[FINAL_IV]]> from vector.early.exit.0)
;
entry:
%A = alloca [1024 x i8]
>From ac29adbfd035d1f3ad3e5d947d9993de0d66b82d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 29 Jan 2026 22:07:14 +0000
Subject: [PATCH 03/11] !fixup address comments, thanks
---
.../Vectorize/VPlanConstruction.cpp | 15 ++--
.../LoopVectorize/unsupported_early_exit.ll | 70 ++++++++++++++++++-
2 files changed, 76 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index a82e8ac1f9d55..974fb9df2f366 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -873,13 +873,17 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
+ if (HasUncountableEarlyExit) {
+ handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
+ MiddleVPBB);
+ return;
+ }
+
// Disconnect countable early exits from the loop, leaving it with a single
// exit from the latch. Countable early exits are left for a scalar epilog.
- // When there are uncountable early exits, skip this loop entirely - they are
- // handled separately in handleUncountableEarlyExits.
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
- if (Pred == MiddleVPBB || HasUncountableEarlyExit)
+ if (Pred == MiddleVPBB)
continue;
// Remove phi operands for the early exiting block.
@@ -890,11 +894,6 @@ void VPlanTransforms::handleEarlyExits(VPlan &Plan,
VPBlockUtils::disconnectBlocks(Pred, EB);
}
}
-
- if (HasUncountableEarlyExit) {
- handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
- MiddleVPBB);
- }
}
void VPlanTransforms::addMiddleCheck(VPlan &Plan,
diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
index baa0898a6f053..614c98c6b8016 100644
--- a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll
@@ -120,7 +120,7 @@ define i64 @loop_contains_unsafe_call() {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
; CHECK: loop.inc:
@@ -650,6 +650,74 @@ loop.end:
ret i64 %retval
}
+; Two early exits with load (not known to be dereferenceable) in a non-exiting middle block between them. The load is only executed if the first early exit is not taken, so it needs predication. This should not be vectorized.
+define i64 @multi_exit_load_in_nonexiting_block(ptr %src) {
+; CHECK-LABEL: define i64 @multi_exit_load_in_nonexiting_block(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP1]], align 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD1]], 42
+; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD_OUT:%.*]] = load i64, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: br label [[EARLY_EXIT_2:%.*]]
+; CHECK: early.exit.2:
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP2]], align 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_END]], label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_HEADER]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP_HEADER]] ], [ [[LD_OUT]], [[EARLY_EXIT_2]] ], [ 67, [[LOOP_LATCH]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop.header
+
+loop.header:
+ %index = phi i64 [ %index.next, %loop.latch ], [ 0, %entry ]
+ %gep1 = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %gep1, align 1
+ %cmp1 = icmp eq i8 %ld1, 42
+ br i1 %cmp1, label %loop.end, label %middle.block
+
+middle.block:
+ %gep.src = getelementptr inbounds i64, ptr %src, i64 %index
+ %ld.out = load i64, ptr %gep.src
+ br label %early.exit.2
+
+early.exit.2:
+ ; Second early exit block
+ %gep2 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %gep2, align 1
+ %cmp2 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp2, label %loop.end, label %loop.latch
+
+loop.latch:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop.header, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop.header ], [ %ld.out, %early.exit.2 ], [ 67, %loop.latch ]
+ ret i64 %retval
+}
declare i32 @foo(i32) readonly
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
>From ce012e937694dd43c1cfc55114ace2efc2325fb1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 30 Jan 2026 15:40:37 +0000
Subject: [PATCH 04/11] !fixup re-generate checks matching to match block
names.
---
.../LoopVectorize/multiple-early-exits.ll | 480 +++++++++---------
1 file changed, 240 insertions(+), 240 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
index 9422bf4dc70bc..0d86e6dc9abed 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
@@ -12,11 +12,11 @@ define i64 @two_early_exits_same_exit() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
@@ -28,23 +28,23 @@ define i64 @two_early_exits_same_exit() {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
-; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 64, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A1]], align 1
@@ -58,9 +58,9 @@ define i64 @two_early_exits_same_exit() {
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER1]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT3]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_0]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -100,11 +100,11 @@ define i64 @two_early_exits_different_exits() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT1:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]]
@@ -116,23 +116,23 @@ define i64 @two_early_exits_different_exits() {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT1:.*]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
-; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 64, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A1]], align 1
@@ -146,12 +146,12 @@ define i64 @two_early_exits_different_exits() {
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER1]], label %[[EXIT:.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT:.*]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RET2:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT3]] ]
+; CHECK-NEXT: [[RET2:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_0]] ]
; CHECK-NEXT: ret i64 [[RET2]]
; CHECK: [[EXIT2]]:
-; CHECK-NEXT: [[RET3:%.*]] = phi i64 [ 100, %[[EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ]
+; CHECK-NEXT: [[RET3:%.*]] = phi i64 [ 100, %[[EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ]
; CHECK-NEXT: ret i64 [[RET3]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i64 43
@@ -267,11 +267,11 @@ define i64 @three_early_exits_same_exit() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
@@ -286,28 +286,28 @@ define i64 @three_early_exits_same_exit() {
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 3, [[TMP12]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP13]], %[[VECTOR_EARLY_EXIT4]] ], [ 100, %[[VECTOR_EARLY_EXIT3]] ], [ 200, %[[VECTOR_EARLY_EXIT2]] ], [ 43, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -353,11 +353,11 @@ define i64 @four_early_exits_same_exit() {
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P3]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
@@ -376,33 +376,33 @@ define i64 @four_early_exits_same_exit() {
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT7:.*]], label %[[EARLY_EXIT_2:.*]]
-; CHECK: [[EARLY_EXIT_2]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT6:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_1]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT5:.*]], label %[[VECTOR_EARLY_EXIT4:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT4]]:
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT5]]:
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT6]]:
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT7]]:
+; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP17]], %[[VECTOR_EARLY_EXIT7]] ], [ 100, %[[VECTOR_EARLY_EXIT6]] ], [ 200, %[[VECTOR_EARLY_EXIT5]] ], [ 300, %[[VECTOR_EARLY_EXIT4]] ], [ 43, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP17]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 300, %[[VECTOR_EARLY_EXIT_3]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -454,11 +454,11 @@ define i64 @two_early_exits_with_live_out_values() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
@@ -472,27 +472,27 @@ define i64 @two_early_exits_with_live_out_values() {
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; CHECK-NEXT: br i1 [[TMP7]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: br i1 [[TMP8]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 3, [[TMP11]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 3, [[TMP13]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT2]] ], [ 99, %[[LOOP_LATCH]] ]
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT2]] ], [ 0, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ], [ 99, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL2]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -538,11 +538,11 @@ define i64 @two_early_exits_negated_condition() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[IV:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
@@ -555,24 +555,24 @@ define i64 @two_early_exits_negated_condition() {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 124
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]]
-; CHECK: [[LOOP_HEADER1]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 127, %[[SCALAR_PH]] ]
; CHECK-NEXT: [[GEP_A1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV1]]
; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A1]], align 1
@@ -586,9 +586,9 @@ define i64 @two_early_exits_negated_condition() {
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 128
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER1]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER1]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT2]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT3]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -629,11 +629,11 @@ define i64 @three_early_exits_three_exit_blocks() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
@@ -648,33 +648,33 @@ define i64 @three_early_exits_three_exit_blocks() {
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
-; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT1:.*]], label %[[EARLY_EXIT_0]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: br i1 [[TMP9]], label %[[EARLY_EXIT_1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br label %[[EXIT4:.*]]
-; CHECK: [[EXIT1]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT3:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[EXIT2:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
-; CHECK-NEXT: br label %[[EXIT5:.*]]
-; CHECK: [[EXIT2]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[EXIT3]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
+; CHECK-NEXT: br label %[[EXIT3:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
+; CHECK-NEXT: br label %[[EXIT2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[RET3:%.*]] = add i64 3, [[TMP12]]
-; CHECK-NEXT: br label %[[EXIT6:.*]]
-; CHECK: [[EXIT6]]:
+; CHECK-NEXT: br label %[[EXIT1:.*]]
+; CHECK: [[EXIT1]]:
; CHECK-NEXT: ret i64 [[RET3]]
-; CHECK: [[EXIT]]:
+; CHECK: [[EXIT2]]:
; CHECK-NEXT: ret i64 100
-; CHECK: [[EXIT5]]:
+; CHECK: [[EXIT3]]:
; CHECK-NEXT: ret i64 200
-; CHECK: [[EXIT4]]:
+; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i64 43
;
entry:
@@ -796,11 +796,11 @@ define i64 @three_early_exits_iv_and_load_live_out() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
@@ -814,26 +814,26 @@ define i64 @three_early_exits_iv_and_load_live_out() {
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[TMP8]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br i1 [[TMP9]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i8> [[WIDE_LOAD1]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT3]] ], [ 128, %[[LOOP_LATCH]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT2]] ]
-; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[TMP13]], %[[VECTOR_EARLY_EXIT3]] ], [ 0, %[[LOOP_LATCH]] ], [ [[TMP11]], %[[VECTOR_EARLY_EXIT2]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ]
+; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP11]], %[[VECTOR_EARLY_EXIT_1]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_LD]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL_IV]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -960,11 +960,11 @@ define i64 @two_early_exits_iv_diff_incoming() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
@@ -976,24 +976,24 @@ define i64 @two_early_exits_iv_diff_incoming() {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 1
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT2]] ], [ 200, %[[LOOP_LATCH]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1034,11 +1034,11 @@ define { i64, i64 } @three_early_exits_multiple_live_outs() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_0:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
@@ -1054,33 +1054,33 @@ define { i64, i64 } @three_early_exits_multiple_live_outs() {
; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[TMP10]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: br i1 [[TMP11]], label %[[EARLY_EXIT_1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP19]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT2]] ], [ 128, %[[EARLY_EXIT_1]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP18]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT2]] ], [ 0, %[[EARLY_EXIT_1]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP19]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP18]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_VAL]] to i64
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[EXT]], 1
@@ -1134,11 +1134,11 @@ define i64 @two_early_exits_load_in_early_exit_block() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
@@ -1150,23 +1150,23 @@ define i64 @two_early_exits_load_in_early_exit_block() {
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT3:.*]], label %[[VECTOR_EARLY_EXIT2:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT2]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT3]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_1]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1208,11 +1208,11 @@ define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P3]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 42)
@@ -1228,29 +1228,29 @@ define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT5:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT4:.*]], label %[[VECTOR_EARLY_EXIT3:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT3]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT4]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT5]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT5]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT4]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT3]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT_2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1302,11 +1302,11 @@ define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P3]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P4]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: br label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[EARLY_EXIT_1:.*]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 10)
@@ -1326,40 +1326,40 @@ define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT:.*]], label %[[EARLY_EXIT_1]]
-; CHECK: [[EARLY_EXIT_1]]:
-; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[EARLY_EXIT_0]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP2]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[VECTOR_BODY_INTERIM]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT1:.*]]
-; CHECK: [[EXIT]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false)
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT8:.*]], label %[[EARLY_EXIT_2:.*]]
-; CHECK: [[EARLY_EXIT_2]]:
+; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_0:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_0]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP3]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT7:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: br i1 [[TMP15]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_CHECK_1:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_CHECK_1]]:
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT6:.*]], label %[[VECTOR_EARLY_EXIT5:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT5]]:
+; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT_3]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i8> [[WIDE_LOAD3]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT6]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i8> [[WIDE_LOAD2]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT7]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i8> [[WIDE_LOAD1]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[VECTOR_EARLY_EXIT8]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
-; CHECK-NEXT: br label %[[EXIT1]]
-; CHECK: [[EXIT1]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP24]], %[[VECTOR_EARLY_EXIT8]] ], [ [[TMP22]], %[[VECTOR_EARLY_EXIT7]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT6]] ], [ [[TMP18]], %[[VECTOR_EARLY_EXIT5]] ], [ 128, %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP23]], %[[VECTOR_EARLY_EXIT8]] ], [ [[TMP21]], %[[VECTOR_EARLY_EXIT7]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT6]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT5]] ], [ 0, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP24]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP22]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP18]], %[[VECTOR_EARLY_EXIT_3]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP23]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP21]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_3]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i8 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i8 } [[R1]], i8 [[RETVAL_VAL]], 1
; CHECK-NEXT: ret { i64, i8 } [[R2]]
>From 4c0b68c4da7553e5358d4af9ee14f6d7e72727e8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 2 Feb 2026 15:57:27 +0000
Subject: [PATCH 05/11] !fixup address comments, thanks
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 176 ++++++++++++------
.../LoopVectorize/multiple-early-exits.ll | 30 +--
.../uncountable-early-exit-vplan.ll | 2 +-
3 files changed, 133 insertions(+), 75 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 05e492b0ed666..fab7af12f165d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3980,23 +3980,24 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VPBuilder Builder(LatchVPBB->getTerminator());
SmallVector<EarlyExitInfo> Exits;
- for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
- for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
+ for (VPIRBasicBlock *ExitBlock : Plan.getExitBlocks()) {
+ for (VPBlockBase *Pred : to_vector(ExitBlock->getPredecessors())) {
if (Pred == MiddleVPBB)
continue;
// Collect condition for this early exit.
auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
- assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) &&
- "Terminator must be BranchOnCond");
- VPValue *CondOfEarlyExitingVPBB =
- EarlyExitingVPBB->getTerminator()->getOperand(0);
- auto *CondToEarlyExit = TrueSucc == EB
+ VPValue *CondOfEarlyExitingVPBB;
+ [[maybe_unused]] bool Matched =
+ match(EarlyExitingVPBB->getTerminator(),
+ m_BranchOnCond(m_VPValue(CondOfEarlyExitingVPBB)));
+ assert(Matched && "Terminator must be BranchOnCond");
+ auto *CondToEarlyExit = TrueSucc == ExitBlock
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);
Exits.push_back({
EarlyExitingVPBB,
- EB,
+ ExitBlock,
CondToEarlyExit,
});
}
@@ -4008,43 +4009,90 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
});
- // Build the AnyOf condition for the latch terminator. For multiple exits,
- // also create an exit dispatch block to determine which exit to take.
+ // Build the AnyOf condition for the latch terminator.
VPValue *Combined = Exits[0].CondToExit;
- for (const auto &Exit : drop_begin(Exits))
- Combined = Builder.createOr(Combined, Exit.CondToExit);
+ assert(
+ VPDT.dominates(Combined->getDefiningRecipe()->getParent(), LatchVPBB) &&
+ "All conditions must dominate the latch");
+ for (const auto &[_, _1, CondToExit] : drop_begin(Exits)) {
+ assert(VPDT.dominates(CondToExit->getDefiningRecipe()->getParent(),
+ LatchVPBB) &&
+ "All conditions must dominate the latch");
+ Combined = Builder.createOr(Combined, CondToExit);
+ }
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});
- VPSymbolicValue FirstActiveLane;
- // Process exits in reverse order so phi operands are added in the order
- // matching the original program order (last exit's operand added first
- // becomes last). The vector is reversed afterwards to restore forward order
- // for the dispatch logic.
- SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs;
- for (auto [I, Exit] : enumerate(reverse(Exits))) {
- auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] = Exit;
- unsigned Idx = Exits.size() - 1 - I;
+ // Create the vector.early.exit blocks.
+ SmallVector<VPBasicBlock *> VectorEarlyExitVPBBs(Exits.size());
+ for (unsigned Idx = 0; Idx != Exits.size(); ++Idx) {
Twine BlockSuffix = Exits.size() == 1 ? "" : Twine(".") + Twine(Idx);
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit" + BlockSuffix);
- VectorEarlyExitVPBBs.push_back(VectorEarlyExitVPBB);
+ VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
+ }
+
+ // Create the dispatch block (or reuse the single exit block if only one
+ // exit). The dispatch block computes the first active lane of the combined
+ // condition and, for multiple exits, chains through conditions to determine
+ // which exit to take.
+ VPBasicBlock *DispatchVPBB =
+ Exits.size() == 1 ? VectorEarlyExitVPBBs[0]
+ : Plan.createVPBasicBlock("vector.early.exit.check");
+ VPBuilder DispatchBuilder(DispatchVPBB, DispatchVPBB->begin());
+ VPValue *FirstActiveLane =
+ DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined},
+ DebugLoc::getUnknown(), "first.active.lane");
+ // For each early exit, disconnect the original exiting block
+ // (early.exiting.I) from the exit block (ir-bb<exit.I>) and route through a
+ // new vector.early.exit block. Update ir-bb<exit.I>'s phis to extract their
+ // values at the first active lane:
+ //
+ // Input:
+ // early.exiting.I:
+ // ...
+ // EMIT branch-on-cond vp<%cond.I>
+ // Successor(s): in.loop.succ, ir-bb<exit.I>
+ //
+ // ir-bb<exit.I>:
+ // IR %phi = phi [ vp<%incoming.I>, early.exiting.I ], ...
+ //
+ // Output:
+ // early.exiting.I:
+ // ...
+ // Successor(s): in.loop.succ
+ //
+ // vector.early.exit.I:
+ // EMIT vp<%exit.val> = extract-lane vp<%first.lane>, vp<%incoming.I>
+ // Successor(s): ir-bb<exit.I>
+ //
+ // ir-bb<exit.I>:
+ // IR %phi = phi ... (extra operand: vp<%exit.val> from
+ // vector.early.exit.I)
+ //
+ for (auto [Exit, VectorEarlyExitVPBB] : zip(Exits, VectorEarlyExitVPBBs)) {
+ auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] = Exit;
+ // Adjust the phi nodes in EarlyExitVPBB.
+ // 1. remove incoming values from EarlyExitingVPBB,
+ // 2. extract the incoming value at FirstActiveLane
+ // 3. add back the extracts as last operands for the phis
+ // Then adjust the CFG, removing the edge between EarlyExitingVPBB and
+ // EarlyExitVPBB and adding a new edge between VectorEarlyExitVPBB and
+ // EarlyExitVPBB. The extracts at FirstActiveLane are now the incoming
+ // values from VectorEarlyExitVPBB.
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
VPValue *IncomingVal =
ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
-
- // Compute the incoming value for this early exit.
VPValue *NewIncoming = IncomingVal;
if (!isa<VPIRValue>(IncomingVal)) {
- VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- NewIncoming = EarlyExitB.createNaryOp(
- VPInstruction::ExtractLane, {&FirstActiveLane, IncomingVal},
+ VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
+ NewIncoming = EarlyExitBuilder.createNaryOp(
+ VPInstruction::ExtractLane, {FirstActiveLane, IncomingVal},
DebugLoc::getUnknown(), "early.exit.value");
}
ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
- // Add the new incoming value for this early exit.
ExitIRI->addOperand(NewIncoming);
}
@@ -4052,33 +4100,53 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
}
- VectorEarlyExitVPBBs = to_vector(llvm::reverse(VectorEarlyExitVPBBs));
// For exit blocks that also have the middle block as predecessor (latch
// exits to the same block as an early exit), extract the last lane of the
// first operand for the middle block's incoming value.
VPBuilder MiddleBuilder(MiddleVPBB);
- for (VPRecipeBase &R :
- cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->phis()) {
- auto *ExitIRI = cast<VPIRPhi>(&R);
- if (ExitIRI->getNumOperands() == 1)
- continue;
- ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
+ VPBasicBlock *MiddleSuccVPBB =
+ cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
+ if (MiddleSuccVPBB->getNumPredecessors() > 1) {
+ assert(all_of(MiddleSuccVPBB->getPredecessors(),
+ [&](VPBlockBase *Pred) {
+ return Pred == MiddleVPBB ||
+ is_contained(VectorEarlyExitVPBBs, Pred);
+ }) &&
+ "All predecessors must be either the middle block or early exit "
+ "blocks");
+
+ for (VPRecipeBase &R : MiddleSuccVPBB->phis()) {
+ auto *ExitIRI = cast<VPIRPhi>(&R);
+ assert(ExitIRI->getIncomingValueForBlock(MiddleVPBB) ==
+ ExitIRI->getOperand(0) &&
+ "First operand must come from middle block");
+ ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
+ }
}
if (Exits.size() != 1) {
- VPBasicBlock *DispatchBB =
- Plan.createVPBasicBlock("vector.early.exit.check");
- DispatchBB->setParent(VectorEarlyExitVPBBs[0]->getParent());
- // In the dispatch block, compute the first active lane across all
- // conditions and chain through exits.
- VPBuilder DispatchBuilder(DispatchBB);
- // Chain through exits: for each exit, check if its condition is true at the
- // first active lane. If so, take that exit. Otherwise, try the next exit.
- VPBasicBlock *CurrentBB = DispatchBB;
+ // Chain through exits: for each exit, check if its condition is true at
+ // the first active lane. If so, take that exit; otherwise, try the next.
+ // The last exit needs no check since it must be taken if all others fail.
+ //
+ // For 3 exits (cond.0, cond.1, cond.2), this creates:
+ //
+ // vector.early.exit.check:
+ // EMIT vp<%combined> = or vp<%cond.0>, vp<%cond.1>, vp<%cond.2>
+ // EMIT vp<%first.lane> = first-active-lane vp<%combined>
+ // EMIT vp<%at.cond.0> = extract-lane vp<%first.lane>, vp<%cond.0>
+ // EMIT branch-on-cond vp<%at.cond.0>
+ // Successor(s): vector.early.exit.0, vector.early.exit.check.0
+ //
+ // vector.early.exit.check.0:
+ // EMIT vp<%at.cond.1> = extract-lane vp<%first.lane>, vp<%cond.1>
+ // EMIT branch-on-cond vp<%at.cond.1>
+ // Successor(s): vector.early.exit.1, vector.early.exit.2
+ VPBasicBlock *CurrentBB = DispatchVPBB;
for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
VPValue *LaneVal = DispatchBuilder.createNaryOp(
- VPInstruction::ExtractLane, {&FirstActiveLane, Exit.CondToExit},
+ VPInstruction::ExtractLane, {FirstActiveLane, Exit.CondToExit},
DebugLoc::getUnknown(), "exit.cond.at.lane");
// For the last dispatch, branch directly to the last exit on false;
@@ -4096,21 +4164,11 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
FalseBB->setPredecessors({CurrentBB});
- if (!IsLastDispatch) {
- CurrentBB = FalseBB;
- DispatchBuilder.setInsertPoint(CurrentBB);
- }
+ CurrentBB = FalseBB;
+ DispatchBuilder.setInsertPoint(CurrentBB);
}
- VectorEarlyExitVPBBs[0] = DispatchBB;
}
- VPBuilder DispatchBuilder(VectorEarlyExitVPBBs[0],
- VectorEarlyExitVPBBs[0]->begin());
- VPValue *FirstLane =
- DispatchBuilder.createNaryOp(VPInstruction::FirstActiveLane, {Combined},
- DebugLoc::getUnknown(), "first.active.lane");
- FirstActiveLane.replaceAllUsesWith(FirstLane);
-
// Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
@@ -4125,8 +4183,8 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
Builder.createNaryOp(VPInstruction::BranchOnTwoConds,
{IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
LatchVPBB->clearSuccessors();
- LatchVPBB->setSuccessors({VectorEarlyExitVPBBs[0], MiddleVPBB, HeaderVPBB});
- VectorEarlyExitVPBBs[0]->setPredecessors({LatchVPBB});
+ LatchVPBB->setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
+ DispatchVPBB->setPredecessors({LatchVPBB});
}
/// This function tries convert extended in-loop reductions to
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
index 0d86e6dc9abed..0e11b174eca69 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
@@ -60,7 +60,7 @@ define i64 @two_early_exits_same_exit() {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_0]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -307,7 +307,7 @@ define i64 @three_early_exits_same_exit() {
; CHECK-NEXT: [[TMP13:%.*]] = add i64 3, [[TMP12]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 43, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -402,7 +402,7 @@ define i64 @four_early_exits_same_exit() {
; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP17]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 300, %[[VECTOR_EARLY_EXIT_3]] ], [ 43, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 300, %[[VECTOR_EARLY_EXIT_3]] ], [ 200, %[[VECTOR_EARLY_EXIT_2]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_0]] ], [ 43, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -491,8 +491,8 @@ define i64 @two_early_exits_with_live_out_values() {
; CHECK-NEXT: [[TMP14:%.*]] = add i64 3, [[TMP13]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ], [ 99, %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ 0, %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ], [ 99, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i8 [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL2]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -588,7 +588,7 @@ define i64 @two_early_exits_negated_condition() {
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 128
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 100, %[[EARLY_EXIT_0]] ], [ 43, %[[LOOP_LATCH]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ], [ 100, %[[VECTOR_EARLY_EXIT_1]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -832,8 +832,8 @@ define i64 @three_early_exits_iv_and_load_live_out() {
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ]
-; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP11]], %[[VECTOR_EARLY_EXIT_1]] ]
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP12]], %[[VECTOR_EARLY_EXIT_1]] ], [ 128, %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_0]] ]
+; CHECK-NEXT: [[RETVAL_LD:%.*]] = phi i8 [ [[TMP11]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT_0]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_LD]] to i64
; CHECK-NEXT: [[RET:%.*]] = add i64 [[RETVAL_IV]], [[EXT]]
; CHECK-NEXT: ret i64 [[RET]]
@@ -993,7 +993,7 @@ define i64 @two_early_exits_iv_diff_incoming() {
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP11]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ 200, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP11]], %[[VECTOR_EARLY_EXIT_0]] ], [ 200, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1079,8 +1079,8 @@ define { i64, i64 } @three_early_exits_multiple_live_outs() {
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP19]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP18]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP14]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP18]], %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[RETVAL_VAL]] to i64
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i64 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i64 } [[R1]], i64 [[EXT]], 1
@@ -1166,7 +1166,7 @@ define i64 @two_early_exits_load_in_early_exit_block() {
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT_1]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP9]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP10]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1250,7 +1250,7 @@ define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT_2]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP13]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -1358,8 +1358,8 @@ define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP24]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP22]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP18]], %[[VECTOR_EARLY_EXIT_3]] ], [ 128, %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP23]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP21]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP17]], %[[VECTOR_EARLY_EXIT_3]] ], [ 0, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_IV:%.*]] = phi i64 [ [[TMP18]], %[[VECTOR_EARLY_EXIT_3]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP22]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP24]], %[[VECTOR_EARLY_EXIT_0]] ], [ 128, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL_VAL:%.*]] = phi i8 [ [[TMP17]], %[[VECTOR_EARLY_EXIT_3]] ], [ [[TMP19]], %[[VECTOR_EARLY_EXIT_2]] ], [ [[TMP21]], %[[VECTOR_EARLY_EXIT_1]] ], [ [[TMP23]], %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[R1:%.*]] = insertvalue { i64, i8 } undef, i64 [[RETVAL_IV]], 0
; CHECK-NEXT: [[R2:%.*]] = insertvalue { i64, i8 } [[R1]], i8 [[RETVAL_VAL]], 1
; CHECK-NEXT: ret { i64, i8 } [[R2]]
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
index 04bea947f4b3b..a1b743aa8f793 100644
--- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
@@ -298,7 +298,7 @@ define i64 @two_early_exits_same_exit_with_constant_live_outs() {
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>:
-; CHECK-NEXT: IR %retval = phi i64 [ %iv, %loop.header ], [ 100, %early.exit.0 ], [ 43, %loop.latch ] (extra operands: ir<43> from middle.block, ir<100> from vector.early.exit.1, vp<[[FINAL_IV]]> from vector.early.exit.0)
+; CHECK-NEXT: IR %retval = phi i64 [ %iv, %loop.header ], [ 100, %early.exit.0 ], [ 43, %loop.latch ] (extra operands: ir<43> from middle.block, vp<[[FINAL_IV]]> from vector.early.exit.0, ir<100> from vector.early.exit.1)
;
entry:
%A = alloca [1024 x i8]
>From 2a93f45fbc3e32aff78e542908c99fef4bd5e0e3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 9 Feb 2026 14:41:54 +0000
Subject: [PATCH 06/11] !fixup address latest comments, thanks!
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 34 ++++++++-----------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4dcd3dd114fcc..b2fa81846bae6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4001,6 +4001,16 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VPValue *CondToExit;
};
+ // Helper to check if a VPValue's definition dominates the latch.
+ // Live-in values (with no defining recipe) dominate everything.
+ VPDominatorTree VPDT(Plan);
+ [[maybe_unused]] auto DominatesLatch = [&VPDT, LatchVPBB](VPValue *V) {
+ VPRecipeBase *DefRecipe = V->getDefiningRecipe();
+ if (!DefRecipe)
+ return true;
+ return VPDT.properlyDominates(DefRecipe->getParent(), LatchVPBB);
+ };
+
VPBuilder Builder(LatchVPBB->getTerminator());
SmallVector<EarlyExitInfo> Exits;
for (VPIRBasicBlock *ExitBlock : Plan.getExitBlocks()) {
@@ -4018,6 +4028,8 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
auto *CondToEarlyExit = TrueSucc == ExitBlock
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);
+ assert(DominatesLatch(CondOfEarlyExitingVPBB) &&
+ "exit condition must dominate the latch");
Exits.push_back({
EarlyExitingVPBB,
ExitBlock,
@@ -4026,29 +4038,17 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
}
}
+ assert(!Exits.empty() && "must have at least one early exit");
// Sort exits by dominance to get the correct program order.
- VPDominatorTree VPDT(Plan);
llvm::sort(Exits, [&VPDT](const EarlyExitInfo &A, const EarlyExitInfo &B) {
return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
});
- // Helper to check if a VPValue's definition dominates the latch.
- // Live-in values (with no defining recipe) dominate everything.
- auto DominatesLatch = [&VPDT, LatchVPBB](VPValue *V) {
- VPRecipeBase *DefRecipe = V->getDefiningRecipe();
- if (!DefRecipe)
- return true;
- return VPDT.dominates(DefRecipe->getParent(), LatchVPBB);
- };
-
// Build the AnyOf condition for the latch terminator.
VPValue *Combined = Exits[0].CondToExit;
- assert(DominatesLatch(Combined) && "All conditions must dominate the latch");
- for (const auto &[_, _1, CondToExit] : drop_begin(Exits)) {
- assert(DominatesLatch(CondToExit) &&
- "All conditions must dominate the latch");
+ for (const auto &[_, _1, CondToExit] : drop_begin(Exits))
Combined = Builder.createOr(Combined, CondToExit);
- }
+
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});
@@ -4154,7 +4154,6 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
}
}
- if (Exits.size() != 1) {
// Chain through exits: for each exit, check if its condition is true at
// the first active lane. If so, take that exit; otherwise, try the next.
// The last exit needs no check since it must be taken if all others fail.
@@ -4185,8 +4184,6 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
IsLastDispatch ? VectorEarlyExitVPBBs.back()
: Plan.createVPBasicBlock(
Twine("vector.early.exit.check.") + Twine(I));
- if (!IsLastDispatch)
- FalseBB->setParent(LatchVPBB->getParent());
DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
@@ -4196,7 +4193,6 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
CurrentBB = FalseBB;
DispatchBuilder.setInsertPoint(CurrentBB);
}
- }
// Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
>From c02c176c4748a9ea170f51202dede725dab6bf55 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 9 Feb 2026 15:18:17 +0000
Subject: [PATCH 07/11] !fixup fix formatting
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 78 +++++++++----------
1 file changed, 39 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b2fa81846bae6..954a473c82dcc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4154,45 +4154,45 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
}
}
- // Chain through exits: for each exit, check if its condition is true at
- // the first active lane. If so, take that exit; otherwise, try the next.
- // The last exit needs no check since it must be taken if all others fail.
- //
- // For 3 exits (cond.0, cond.1, cond.2), this creates:
- //
- // vector.early.exit.check:
- // EMIT vp<%combined> = or vp<%cond.0>, vp<%cond.1>, vp<%cond.2>
- // EMIT vp<%first.lane> = first-active-lane vp<%combined>
- // EMIT vp<%at.cond.0> = extract-lane vp<%first.lane>, vp<%cond.0>
- // EMIT branch-on-cond vp<%at.cond.0>
- // Successor(s): vector.early.exit.0, vector.early.exit.check.0
- //
- // vector.early.exit.check.0:
- // EMIT vp<%at.cond.1> = extract-lane vp<%first.lane>, vp<%cond.1>
- // EMIT branch-on-cond vp<%at.cond.1>
- // Successor(s): vector.early.exit.1, vector.early.exit.2
- VPBasicBlock *CurrentBB = DispatchVPBB;
- for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
- VPValue *LaneVal = DispatchBuilder.createNaryOp(
- VPInstruction::ExtractLane, {FirstActiveLane, Exit.CondToExit},
- DebugLoc::getUnknown(), "exit.cond.at.lane");
-
- // For the last dispatch, branch directly to the last exit on false;
- // otherwise, create a new check block.
- bool IsLastDispatch = (I + 2 == Exits.size());
- VPBasicBlock *FalseBB =
- IsLastDispatch ? VectorEarlyExitVPBBs.back()
- : Plan.createVPBasicBlock(
- Twine("vector.early.exit.check.") + Twine(I));
-
- DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
- CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
- VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
- FalseBB->setPredecessors({CurrentBB});
-
- CurrentBB = FalseBB;
- DispatchBuilder.setInsertPoint(CurrentBB);
- }
+ // Chain through exits: for each exit, check if its condition is true at
+ // the first active lane. If so, take that exit; otherwise, try the next.
+ // The last exit needs no check since it must be taken if all others fail.
+ //
+ // For 3 exits (cond.0, cond.1, cond.2), this creates:
+ //
+ // vector.early.exit.check:
+ // EMIT vp<%combined> = or vp<%cond.0>, vp<%cond.1>, vp<%cond.2>
+ // EMIT vp<%first.lane> = first-active-lane vp<%combined>
+ // EMIT vp<%at.cond.0> = extract-lane vp<%first.lane>, vp<%cond.0>
+ // EMIT branch-on-cond vp<%at.cond.0>
+ // Successor(s): vector.early.exit.0, vector.early.exit.check.0
+ //
+ // vector.early.exit.check.0:
+ // EMIT vp<%at.cond.1> = extract-lane vp<%first.lane>, vp<%cond.1>
+ // EMIT branch-on-cond vp<%at.cond.1>
+ // Successor(s): vector.early.exit.1, vector.early.exit.2
+ VPBasicBlock *CurrentBB = DispatchVPBB;
+ for (auto [I, Exit] : enumerate(ArrayRef(Exits).drop_back())) {
+ VPValue *LaneVal = DispatchBuilder.createNaryOp(
+ VPInstruction::ExtractLane, {FirstActiveLane, Exit.CondToExit},
+ DebugLoc::getUnknown(), "exit.cond.at.lane");
+
+ // For the last dispatch, branch directly to the last exit on false;
+ // otherwise, create a new check block.
+ bool IsLastDispatch = (I + 2 == Exits.size());
+ VPBasicBlock *FalseBB =
+ IsLastDispatch ? VectorEarlyExitVPBBs.back()
+ : Plan.createVPBasicBlock(
+ Twine("vector.early.exit.check.") + Twine(I));
+
+ DispatchBuilder.createNaryOp(VPInstruction::BranchOnCond, {LaneVal});
+ CurrentBB->setSuccessors({VectorEarlyExitVPBBs[I], FalseBB});
+ VectorEarlyExitVPBBs[I]->setPredecessors({CurrentBB});
+ FalseBB->setPredecessors({CurrentBB});
+
+ CurrentBB = FalseBB;
+ DispatchBuilder.setInsertPoint(CurrentBB);
+ }
// Replace the latch terminator with the new branching logic.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
>From a2393c65e918f3763e306397fe8858564cef2ed7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 10 Feb 2026 13:16:50 +0000
Subject: [PATCH 08/11] !fixup use logical or
---
.../Vectorize/LoopVectorizationPlanner.h | 6 +++
llvm/lib/Transforms/Vectorize/VPlan.h | 1 +
.../Transforms/Vectorize/VPlanAnalysis.cpp | 3 +-
.../Transforms/Vectorize/VPlanPatternMatch.h | 8 +++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 +++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 6 ++-
.../LoopVectorize/multi_early_exit.ll | 2 +-
.../LoopVectorize/multiple-early-exits.ll | 42 +++++++++----------
.../uncountable-early-exit-vplan.ll | 2 +-
9 files changed, 52 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 0b8796f646ae3..54bb073eb4f81 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -233,6 +233,12 @@ class VPBuilder {
return createNaryOp(VPInstruction::LogicalAnd, {LHS, RHS}, DL, Name);
}
+ VPInstruction *createLogicalOr(VPValue *LHS, VPValue *RHS,
+ DebugLoc DL = DebugLoc::getUnknown(),
+ const Twine &Name = "") {
+ return createNaryOp(VPInstruction::LogicalOr, {LHS, RHS}, DL, Name);
+ }
+
VPInstruction *createSelect(VPValue *Cond, VPValue *TrueVal,
VPValue *FalseVal,
DebugLoc DL = DebugLoc::getUnknown(),
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 20fcbfb04eea5..68b94555deeb3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1211,6 +1211,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
// during unrolling.
ExtractPenultimateElement,
LogicalAnd, // Non-poison propagating logical And.
+ LogicalOr, // Non-poison propagating logical Or.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 4f97f8000c187..cc77b94a9613c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -124,9 +124,10 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::LastActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::LogicalAnd:
+ case VPInstruction::LogicalOr:
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
- "LogicalAnd operands should be bool");
+ "LogicalAnd/Or operands should be bool");
return IntegerType::get(Ctx, 1);
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnTwoConds:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index c0b736de1bc51..9e1574528e059 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -810,9 +810,13 @@ m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline AllRecipe_match<Instruction::Select, Op0_t, specific_intval<1>, Op1_t>
+inline match_combine_or<
+ VPInstruction_match<VPInstruction::LogicalOr, Op0_t, Op1_t>,
+ AllRecipe_match<Instruction::Select, Op0_t, specific_intval<1>, Op1_t>>
m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Select(Op0, m_True(), Op1);
+ return m_CombineOr(
+ m_VPInstruction<VPInstruction::LogicalOr, Op0_t, Op1_t>(Op0, Op1),
+ m_Select(Op0, m_True(), Op1));
}
template <typename Op0_t, typename Op1_t, typename Op2_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2f2cf93ea3f7f..5dde98a5f0920 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -468,6 +468,7 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
case VPInstruction::ExitingIVValue:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
+ case VPInstruction::LogicalOr:
case VPInstruction::PtrAdd:
case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
@@ -813,6 +814,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *B = State.get(getOperand(1));
return Builder.CreateLogicalAnd(A, B, Name);
}
+ case VPInstruction::LogicalOr: {
+ Value *A = State.get(getOperand(0));
+ Value *B = State.get(getOperand(1));
+ return Builder.CreateLogicalOr(A, B, Name);
+ }
case VPInstruction::PtrAdd: {
assert((State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
"can only generate first lane for PtrAdd");
@@ -1338,6 +1344,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractLastActive:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
+ case VPInstruction::LogicalOr:
case VPInstruction::Not:
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
@@ -1505,6 +1512,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::LogicalAnd:
O << "logical-and";
break;
+ case VPInstruction::LogicalOr:
+ O << "logical-or";
+ break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2463ea0992db9..4d674b975cb0d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4048,10 +4048,12 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
return VPDT.dominates(A.EarlyExitingVPBB, B.EarlyExitingVPBB);
});
- // Build the AnyOf condition for the latch terminator.
+ // Build the AnyOf condition for the latch terminator using logical OR
+ // to avoid poison propagation from later exit conditions when an earlier
+ // exit is taken.
VPValue *Combined = Exits[0].CondToExit;
for (const auto &[_, _1, CondToExit] : drop_begin(Exits))
- Combined = Builder.createOr(Combined, CondToExit);
+ Combined = Builder.createLogicalOr(Combined, CondToExit);
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});
diff --git a/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll b/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll
index bdcda6f28a765..239fc03d79798 100644
--- a/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll
@@ -141,7 +141,7 @@ define i64 @early_exit_with_live_in_condition(i1 %cond) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[INC]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i1> [[BROADCAST_SPLAT]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> splat (i1 true), <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], 1024
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
index 0e11b174eca69..4cabc711f5b7d 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-early-exits.ll
@@ -24,7 +24,7 @@ define i64 @two_early_exits_same_exit() {
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
@@ -112,7 +112,7 @@ define i64 @two_early_exits_different_exits() {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
; CHECK-NEXT: [[INDEX_NEXT1]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT1]], 64
@@ -281,8 +281,8 @@ define i64 @three_early_exits_same_exit() {
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
@@ -370,9 +370,9 @@ define i64 @four_early_exits_same_exit() {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD2]], splat (i8 100)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP7]], [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP8]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> splat (i1 true), <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
@@ -468,7 +468,7 @@ define i64 @two_early_exits_with_live_out_values() {
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i8> [[TMP3]], splat (i8 34)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
@@ -551,7 +551,7 @@ define i64 @two_early_exits_negated_condition() {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 124
@@ -643,8 +643,8 @@ define i64 @three_early_exits_three_exit_blocks() {
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
@@ -810,7 +810,7 @@ define i64 @three_early_exits_iv_and_load_live_out() {
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i8> [[WIDE_LOAD1]], splat (i8 100)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
@@ -972,7 +972,7 @@ define i64 @two_early_exits_iv_diff_incoming() {
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i8> [[WIDE_LOAD]], splat (i8 34)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
@@ -1049,8 +1049,8 @@ define { i64, i64 } @three_early_exits_multiple_live_outs() {
; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <4 x i8> [[TMP5]], splat (i8 100)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> splat (i1 true), <4 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
@@ -1146,7 +1146,7 @@ define i64 @two_early_exits_load_in_early_exit_block() {
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
@@ -1223,8 +1223,8 @@ define i64 @three_early_exits_loads_in_different_blocks() {
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
@@ -1320,9 +1320,9 @@ define { i64, i8 } @four_early_exits_with_conditional_loads() {
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD3]], splat (i8 40)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP1]], [[TMP3]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> splat (i1 true), <4 x i1> [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]]
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
; CHECK-NEXT: [[CMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
index a1b743aa8f793..218919e36b106 100644
--- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll
@@ -270,7 +270,7 @@ define i64 @two_early_exits_same_exit_with_constant_live_outs() {
; CHECK-NEXT: WIDEN ir<%ld.B> = load vp<[[PTRB]]>
; CHECK-NEXT: WIDEN ir<%cmp2> = icmp eq ir<%ld.A>, ir<%ld.B>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
-; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or ir<%cmp1>, ir<%cmp2>
+; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = logical-or ir<%cmp1>, ir<%cmp2>
; CHECK-NEXT: EMIT vp<[[ANY_OF:%.+]]> = any-of vp<[[OR]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<%index.next>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-two-conds vp<[[ANY_OF]]>, vp<[[CMP]]>
>From 1931cae067bb406c619ee7405455a0c63c183325 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 11 Feb 2026 17:11:00 +0000
Subject: [PATCH 09/11] !fixup address comments, thanks
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 5 +---
.../Transforms/Vectorize/VPlanTransforms.cpp | 23 ++++++++-----------
2 files changed, 10 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 9e1574528e059..2a1de8993b1c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -810,10 +810,7 @@ m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline match_combine_or<
- VPInstruction_match<VPInstruction::LogicalOr, Op0_t, Op1_t>,
- AllRecipe_match<Instruction::Select, Op0_t, specific_intval<1>, Op1_t>>
-m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
+auto m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_CombineOr(
m_VPInstruction<VPInstruction::LogicalOr, Op0_t, Op1_t>(Op0, Op1),
m_Select(Op0, m_True(), Op1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3e17345671293..f42870bc0d7fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4005,16 +4005,7 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
VPValue *CondToExit;
};
- // Helper to check if a VPValue's definition dominates the latch.
- // Live-in values (with no defining recipe) dominate everything.
VPDominatorTree VPDT(Plan);
- [[maybe_unused]] auto DominatesLatch = [&VPDT, LatchVPBB](VPValue *V) {
- VPRecipeBase *DefRecipe = V->getDefiningRecipe();
- if (!DefRecipe)
- return true;
- return VPDT.properlyDominates(DefRecipe->getParent(), LatchVPBB);
- };
-
VPBuilder Builder(LatchVPBB->getTerminator());
SmallVector<EarlyExitInfo> Exits;
for (VPIRBasicBlock *ExitBlock : Plan.getExitBlocks()) {
@@ -4032,7 +4023,10 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
auto *CondToEarlyExit = TrueSucc == ExitBlock
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);
- assert(DominatesLatch(CondOfEarlyExitingVPBB) &&
+ assert((isa<VPIRValue>(CondOfEarlyExitingVPBB) ||
+ VPDT.properlyDominates(
+ CondOfEarlyExitingVPBB->getDefiningRecipe()->getParent(),
+ LatchVPBB)) &&
"exit condition must dominate the latch");
Exits.push_back({
EarlyExitingVPBB,
@@ -4052,8 +4046,8 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
// to avoid poison propagation from later exit conditions when an earlier
// exit is taken.
VPValue *Combined = Exits[0].CondToExit;
- for (const auto &[_, _1, CondToExit] : drop_begin(Exits))
- Combined = Builder.createLogicalOr(Combined, CondToExit);
+ for (const EarlyExitInfo &Info : drop_begin(Exits))
+ Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
VPValue *IsAnyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {Combined});
@@ -4106,8 +4100,9 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
// IR %phi = phi ... (extra operand: vp<%exit.val> from
// vector.early.exit.I)
//
- for (auto [Exit, VectorEarlyExitVPBB] : zip(Exits, VectorEarlyExitVPBBs)) {
- auto &[EarlyExitingVPBB, EarlyExitVPBB, CondToExit] = Exit;
+ for (auto [Exit, VectorEarlyExitVPBB] :
+ zip_equal(Exits, VectorEarlyExitVPBBs)) {
+ auto &[EarlyExitingVPBB, EarlyExitVPBB, _] = Exit;
// Adjust the phi nodes in EarlyExitVPBB.
// 1. remove incoming values from EarlyExitingVPBB,
// 2. extract the incoming value at FirstActiveLane
>From ce877069eac41ada6557c507fa7aa3cf3442552a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 12 Feb 2026 18:33:19 +0000
Subject: [PATCH 10/11] !fixup address latest comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 3 ++-
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b1bf0c1f2dfc6..348bece8a9da6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -835,7 +835,8 @@ inline auto m_c_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
template <typename Op0_t, typename Op1_t>
inline auto
m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
- m_VPInstruction<VPInstruction::LogicalOr, Op0_t, Op1_t>(Op0, Op1),
+ return m_CombineOr(
+ m_c_VPInstruction<VPInstruction::LogicalOr, Op0_t, Op1_t>(Op0, Op1),
m_Select(Op0, m_True(), Op1));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 99d2a58b49481..0a87e4048a19f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4161,8 +4161,12 @@ void VPlanTransforms::handleUncountableEarlyExits(VPlan &Plan,
//
// For 3 exits (cond.0, cond.1, cond.2), this creates:
//
+ // latch:
+ // ...
+ // EMIT vp<%combined> = logical-or vp<%cond.0>, vp<%cond.1>, vp<%cond.2>
+ // ...
+ //
// vector.early.exit.check:
- // EMIT vp<%combined> = or vp<%cond.0>, vp<%cond.1>, vp<%cond.2>
// EMIT vp<%first.lane> = first-active-lane vp<%combined>
// EMIT vp<%at.cond.0> = extract-lane vp<%first.lane>, vp<%cond.0>
// EMIT branch-on-cond vp<%at.cond.0>
>From 319720a16d866f332b0274d1ba1e0a4c86bf952d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 12 Feb 2026 18:54:10 +0000
Subject: [PATCH 11/11] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 348bece8a9da6..5154e0e607eb4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -833,8 +833,7 @@ inline auto m_c_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline auto
-m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
+inline auto m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_CombineOr(
m_c_VPInstruction<VPInstruction::LogicalOr, Op0_t, Op1_t>(Op0, Op1),
m_Select(Op0, m_True(), Op1));
More information about the llvm-commits
mailing list