[llvm] [VPlan] Optimize resume values of IVs together with other exit values. (PR #174239)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 07:11:25 PST 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/174239
>From 6dd495288072f5c8db64ad61fcabd5e8413b8516 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 19 Feb 2026 13:11:46 +0000
Subject: [PATCH 1/4] [VPlan] Create extracts early
---
.../Transforms/Vectorize/LoopVectorize.cpp | 15 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +
.../Vectorize/VPlanConstruction.cpp | 35 +++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 176 ++++++++----------
.../Transforms/Vectorize/VPlanTransforms.h | 13 +-
.../LoopVectorize/AArch64/induction-costs.ll | 4 +-
.../AArch64/loop-vectorization-factors.ll | 2 +-
.../tail-folding-fixed-order-recurrence.ll | 8 +-
.../LoopVectorize/VPlan/vplan-printing.ll | 2 +-
.../first-order-recurrence-chains.ll | 118 ++++++------
.../LoopVectorize/first-order-recurrence.ll | 8 +-
.../scalable-first-order-recurrence.ll | 24 +--
.../Transforms/Vectorize/VPlanHCFGTest.cpp | 12 +-
13 files changed, 210 insertions(+), 212 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bdbcada822e4a..b98758258c330 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8342,9 +8342,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// TODO: We can't call runPass on these transforms yet, due to verifier
// failures.
VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range);
- DenseMap<VPValue *, VPValue *> IVEndValues;
- VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues,
- CM.foldTailByMasking());
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8356,6 +8353,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Optimize FindIV reductions to use sentinel-based approach when possible.
RUN_VPLAN_PASS(VPlanTransforms::optimizeFindIVReductions, *Plan, PSE,
*OrigLoop);
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE,
+ CM.foldTailByMasking());
// Apply mandatory transformation to handle reductions with multiple in-loop
// uses if possible, bail out otherwise.
@@ -8420,7 +8419,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
}
- VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, PSE);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
@@ -8456,13 +8454,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI))
return nullptr;
- // TODO: IVEndValues are not used yet in the native path, to optimize exit
- // values.
- // TODO: We can't call runPass on the transform yet, due to verifier
- // failures.
- DenseMap<VPValue *, VPValue *> IVEndValues;
- VPlanTransforms::updateScalarResumePhis(*Plan, IVEndValues,
- /*FoldTail=*/false);
+ // Optimize induction exit users to use precomputed end values.
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE, /*FoldTail=*/false);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 97e9c64d6481d..d07b469960123 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1689,6 +1689,11 @@ struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction,
return R && isa<PHINode>(R->getInstruction());
}
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
+
PHINode &getIRPhi() { return cast<PHINode>(getInstruction()); }
void execute(VPTransformState &State) override;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index ab79822d7fb67..da65d4a1a5f34 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -562,12 +562,21 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
createExtractsForLiveOuts(Plan, MiddleVPBB);
+ VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPBuilder ScalarPHBuilder(ScalarPH);
for (const auto &[PhiR, ScalarPhiR] : zip_equal(
drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
auto *VectorPhiR = cast<VPPhi>(&PhiR);
+ VPValue *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
+ if (!isa<VPIRValue>(ResumeFromVectorLoop)) {
+ ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
+ ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
+ }
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
- {VectorPhiR, VectorPhiR->getOperand(0)}, VectorPhiR->getDebugLoc());
+ {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
+ VectorPhiR->getDebugLoc());
cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
}
}
@@ -742,6 +751,30 @@ void VPlanTransforms::createHeaderPhiRecipes(
PhiR->replaceAllUsesWith(HeaderPhiR);
PhiR->eraseFromParent();
}
+
+ for (const auto &[HeaderPhiR, ScalarPhiR] : zip_equal(
+ drop_begin(HeaderVPBB->phis()), Plan.getScalarPreheader()->phis())) {
+ auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
+ ResumePhiR->setName("scalar.recur.init");
+ auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
+ ExtractLastLane->setName("vector.recur.extract");
+ if (ExtractLastLane->getOpcode() == VPInstruction::ExitingIVValue) {
+ // The FOR's backedge value is shared with an induction and the extract
+ // chain was replaced. Create a fresh one from the backedge value.
+ VPBuilder MiddleBuilder(ExtractLastLane);
+ VPValue *Resume = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastPart, FOR->getBackedgeValue());
+ Resume =
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane, Resume);
+ ResumePhiR->setOperand(0, Resume);
+ }
+ continue;
+ }
+ ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
+ ? "bc.resume.val"
+ : "bc.merge.rdx");
+ }
}
void VPlanTransforms::createInLoopReductionRecipes(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c6aabe7f1ec0f..ed02a440c5806 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -980,6 +980,40 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
return EndValue;
}
+/// Compute the end value for \p WideIV, unless it is truncated. Creates a
+/// VPDerivedIVRecipe for non-canonical inductions.
+static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
+ VPBuilder &VectorPHBuilder,
+ VPTypeAnalysis &TypeInfo,
+ VPValue *VectorTC) {
+ auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
+ // Truncated wide inductions resume from the last lane of their vector value
+ // in the last vector iteration which is handled elsewhere.
+ if (WideIntOrFp && WideIntOrFp->getTruncInst())
+ return nullptr;
+
+ VPIRValue *Start = WideIV->getStartValue();
+ VPValue *Step = WideIV->getStepValue();
+ const InductionDescriptor &ID = WideIV->getInductionDescriptor();
+ VPValue *EndValue = VectorTC;
+ if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
+ EndValue = VectorPHBuilder.createDerivedIV(
+ ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
+ Start, VectorTC, Step);
+ }
+
+ // EndValue is derived from the vector trip count (which has the same type as
+ // the widest induction) and thus may be wider than the induction here.
+ Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
+ EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
+ ScalarTypeOfWideIV,
+ WideIV->getDebugLoc());
+ }
+
+ return EndValue;
+}
+
/// Attempts to optimize the induction variable exit values for users in the
/// exit block coming from the latch in the original scalar loop.
static VPValue *optimizeLatchExitInductionUser(
@@ -987,12 +1021,6 @@ static VPValue *optimizeLatchExitInductionUser(
DenseMap<VPValue *, VPValue *> &EndValues, PredicatedScalarEvolution &PSE) {
VPValue *Incoming;
VPWidenInductionRecipe *WideIV = nullptr;
- if (match(Op, m_ExitingIVValue(m_VPValue(Incoming)))) {
- WideIV = getOptimizableIVOf(Incoming, PSE);
- assert(WideIV && "must have an optimizable IV");
- return EndValues.lookup(WideIV);
- }
-
if (match(Op, m_ExtractLastLaneOfLastPart(m_VPValue(Incoming))))
WideIV = getOptimizableIVOf(Incoming, PSE);
@@ -1000,7 +1028,7 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
VPValue *EndValue = EndValues.lookup(WideIV);
- assert(EndValue && "end value must have been pre-computed");
+ assert(EndValue && "Must have computed the end value up front");
// `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
// changed it means the exit is using the incremented value, so we don't
@@ -1032,11 +1060,39 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
}
-void VPlanTransforms::optimizeInductionExitUsers(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues,
- PredicatedScalarEvolution &PSE) {
- VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
+void VPlanTransforms::optimizeInductionExitUsers(VPlan &Plan,
+ PredicatedScalarEvolution &PSE,
+ bool FoldTail) {
+ // Compute end values for all inductions.
VPTypeAnalysis TypeInfo(Plan);
+ VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
+ auto *VectorPH = cast<VPBasicBlock>(VectorRegion->getSinglePredecessor());
+ VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
+ DenseMap<VPValue *, VPValue *> EndValues;
+ VPValue *ResumeTC =
+ FoldTail ? Plan.getTripCount() : &Plan.getVectorTripCount();
+ for (auto &Phi : VectorRegion->getEntryBasicBlock()->phis()) {
+ auto *WideIV = dyn_cast<VPWidenInductionRecipe>(&Phi);
+ if (!WideIV)
+ continue;
+ if (VPValue *EndValue = tryToComputeEndValueForInduction(
+ WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
+ EndValues[WideIV] = EndValue;
+ }
+
+ VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
+ for (VPRecipeBase &R : make_early_inc_range(*MiddleVPBB)) {
+ VPValue *Op;
+ if (!match(&R, m_ExitingIVValue(m_VPValue(Op))))
+ continue;
+ auto *WideIV = cast<VPWidenInductionRecipe>(Op);
+ if (VPValue *EndValue = EndValues.lookup(WideIV)) {
+ R.getVPSingleValue()->replaceAllUsesWith(EndValue);
+ R.eraseFromParent();
+ }
+ }
+
+ // Then, optimize exit block users.
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : ExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
@@ -5500,101 +5556,10 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
}
-/// Compute and return the end value for \p WideIV, unless it is truncated. If
-/// the induction recipe is not canonical, creates a VPDerivedIVRecipe to
-/// compute the end value of the induction.
-static VPValue *tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
- auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
- // Truncated wide inductions resume from the last lane of their vector value
- // in the last vector iteration which is handled elsewhere.
- if (WideIntOrFp && WideIntOrFp->getTruncInst())
- return nullptr;
-
- VPIRValue *Start = WideIV->getStartValue();
- VPValue *Step = WideIV->getStepValue();
- const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *EndValue = VectorTC;
- if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
- EndValue = VectorPHBuilder.createDerivedIV(
- ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
- Start, VectorTC, Step);
- }
-
- // EndValue is derived from the vector trip count (which has the same type as
- // the widest induction) and thus may be wider than the induction here.
- Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV);
- if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {
- EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,
- ScalarTypeOfWideIV,
- WideIV->getDebugLoc());
- }
-
- return EndValue;
-}
-
-void VPlanTransforms::updateScalarResumePhis(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &IVEndValues, bool FoldTail) {
- VPTypeAnalysis TypeInfo(Plan);
- auto *ScalarPH = Plan.getScalarPreheader();
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
- VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
- VPBuilder VectorPHBuilder(
- cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
- VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
- for (VPRecipeBase &PhiR : Plan.getScalarPreheader()->phis()) {
- auto *ResumePhiR = cast<VPPhi>(&PhiR);
-
- // TODO: Extract final value from induction recipe initially, optimize to
- // pre-computed end value together in optimizeInductionExitUsers.
- auto *VectorPhiR = cast<VPHeaderPHIRecipe>(ResumePhiR->getOperand(0));
- if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
- // TODO: Check if tail is folded directly in VPlan.
- VPValue *TC = !FoldTail
- ? static_cast<VPValue *>(&Plan.getVectorTripCount())
- : Plan.getTripCount();
- if (VPValue *EndValue = tryToComputeEndValueForInduction(
- WideIVR, VectorPHBuilder, TypeInfo, TC)) {
- IVEndValues[WideIVR] = EndValue;
- ResumePhiR->setOperand(0, EndValue);
- ResumePhiR->setName("bc.resume.val");
- continue;
- }
- // TODO: Also handle truncated inductions here. Computing end-values
- // separately should be done as VPlan-to-VPlan optimization, after
- // legalizing all resume values to use the last lane from the loop.
- assert(cast<VPWidenIntOrFpInductionRecipe>(VectorPhiR)->getTruncInst() &&
- "should only skip truncated wide inductions");
- continue;
- }
-
- // The backedge value provides the value to resume coming out of a loop,
- // which for FORs is a vector whose last element needs to be extracted. The
- // start value provides the value if the loop is bypassed.
- bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
- auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
- assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
- "Cannot handle loops with uncountable early exits");
- if (IsFOR) {
- auto *ExtractPart = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastLane, ExtractPart, DebugLoc::getUnknown(),
- "vector.recur.extract");
- }
- ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx");
- ResumePhiR->setOperand(0, ResumeFromVectorLoop);
- }
-}
-
void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
VFRange &Range) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
- auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
auto IsScalableOne = [](ElementCount VF) -> bool {
@@ -5695,7 +5660,12 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
VPInstruction::ExtractPenultimateElement, FOR->getBackedgeValue(), {},
"vector.recur.extract.for.phi");
- cast<VPInstruction>(&R)->replaceAllUsesWith(PenultimateElement);
+ for (VPUser *U : to_vector(cast<VPInstruction>(&R)->users())) {
+ auto *ExitPhi = dyn_cast<VPIRPhi>(U);
+ if (!ExitPhi)
+ continue;
+ ExitPhi->replaceUsesOfWith(cast<VPInstruction>(&R), PenultimateElement);
+ }
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 0dce486cb1c2c..b75dfe5c399a2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -373,10 +373,9 @@ struct VPlanTransforms {
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
- static void
- optimizeInductionExitUsers(VPlan &Plan,
- DenseMap<VPValue *, VPValue *> &EndValues,
- PredicatedScalarEvolution &PSE);
+ static void optimizeInductionExitUsers(VPlan &Plan,
+ PredicatedScalarEvolution &PSE,
+ bool FoldTail);
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
@@ -460,12 +459,6 @@ struct VPlanTransforms {
addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
std::optional<unsigned> VScaleForTuning);
- /// Update the resume phis in the scalar preheader after creating wide recipes
- /// for first-order recurrences, reductions and inductions. End values for
- /// inductions are added to \p IVEndValues.
- static void updateScalarResumePhis(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &IVEndValues, bool FoldTail);
-
/// Handle users in the exit block for first order reductions in the original
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
/// users in the original exit block using the VPIRInstruction wrapping to the
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index 9be5953051e44..183cdc3bd5d80 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -113,8 +113,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -298,8 +298,8 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64>
; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = shl <4 x i64> [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index bd33af286b05d..08c78354e4d33 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -811,8 +811,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP4]], i32 14
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
index ab3f47079b981..6f7aca9767bc1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
@@ -474,17 +474,17 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP: [[MIDDLE_BLOCK]]:
; NO-VP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; NO-VP-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
-; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
+; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 2
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP19]]
; NO-VP-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32()
; NO-VP-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 4
-; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 2
+; NO-VP-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], 1
; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP22]]
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; NO-VP: [[SCALAR_PH]]:
; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
; NO-VP: [[FOR_BODY]]:
; NO-VP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ]
@@ -498,7 +498,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; NO-VP: [[FOR_END]]:
-; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ]
+; NO-VP-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; NO-VP-NEXT: ret i32 [[FOR1_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing.ll
index 0d923183e251a..22f8f23b26ef9 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-printing.ll
@@ -1055,9 +1055,9 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%for.1.next>
; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = extract-last-part ir<%for.1.next>
; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[VP9]]>
-; CHECK-NEXT: EMIT vp<%vector.recur.extract.for.phi> = extract-penultimate-element ir<%for.1.next>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1000>, vp<[[VP2]]>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 2f05435bc75ba..70a629027b18a 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -25,15 +25,15 @@ define i16 @test_chained_first_order_recurrences_1(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT2]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -95,17 +95,17 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr, i64 %n) {
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT2]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -118,7 +118,7 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[FOR_1]], [[FOR_2]]
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI3]], %[[MIDDLE_BLOCK]] ]
@@ -170,20 +170,20 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr, i64 %n) {
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -198,7 +198,7 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -292,7 +292,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr, i64
; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -310,7 +310,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr, i64
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[FOR_1]], 10
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -361,20 +361,20 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr, i64 %n)
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -389,7 +389,7 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr, i64 %n)
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_3_LCSSA:%.*]] = phi i16 [ [[FOR_3]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -446,20 +446,20 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr, i64 %n)
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -474,7 +474,7 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr, i64 %n)
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_3_LCSSA:%.*]] = phi i16 [ [[FOR_3]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -531,20 +531,20 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr,
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -559,7 +559,7 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr,
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -615,20 +615,20 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr %
; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -642,7 +642,7 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr %
; CHECK-NEXT: [[ADD_1:%.*]] = add i16 [[FOR_3]], 10
; CHECK-NEXT: store i16 [[ADD_1]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -699,12 +699,12 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n
; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP3]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[INDEX]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP2]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[INDEX]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -723,7 +723,7 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n
; CHECK-NEXT: [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
; CHECK-NEXT: store double [[RES]], ptr [[GEP_PTR]], align 8
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA1:%.*]] = phi double [ [[FOR_2_LCSSA]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA1:%.*]] = phi double [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI3]], %[[MIDDLE_BLOCK]] ]
@@ -814,10 +814,10 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -832,7 +832,7 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
; CHECK-NEXT: store i64 [[ADD_1]], ptr [[GEP_PTR]], align 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i64 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[FOR_1_LCSSA]]
@@ -878,10 +878,10 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -896,7 +896,7 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
; CHECK-NEXT: store i64 [[ADD_1]], ptr [[GEP_PTR]], align 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i64 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[FOR_1_LCSSA]]
@@ -942,10 +942,10 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr, i64 %n
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -1015,8 +1015,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr, i64 %n
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
@@ -1092,8 +1092,8 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b, i
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 05a6eabed4438..eeb9b05c1fa18 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -886,8 +886,8 @@ define i32 @PR27246() {
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-IC: middle.block:
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
@@ -975,8 +975,8 @@ define i32 @PR27246() {
; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; SINK-AFTER: middle.block:
-; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
@@ -1270,8 +1270,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x, i32 %n) {
; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; UNROLL-NO-IC: middle.block:
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-IC: scalar.ph:
@@ -1346,8 +1346,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x, i32 %n) {
; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; SINK-AFTER: middle.block:
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; SINK-AFTER: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index df1f1ef819317..e2b484a5c8dc2 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -57,16 +57,16 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF1: [[MIDDLE_BLOCK]]:
; CHECK-VF4UF1-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4
-; CHECK-VF4UF1-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1
+; CHECK-VF4UF1-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 2
; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP27]]
; CHECK-VF4UF1-NEXT: [[TMP28:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP29:%.*]] = mul nuw i32 [[TMP28]], 4
-; CHECK-VF4UF1-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], 2
+; CHECK-VF4UF1-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], 1
; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP30]]
; CHECK-VF4UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-VF4UF1-NEXT: br i1 [[CMP_N]], [[FOR_EXIT:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF1: [[SCALAR_PH]]:
-; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF1-NEXT: br label %[[SCALAR_BODY:.*]]
; CHECK-VF4UF1: [[SCALAR_BODY]]:
@@ -127,16 +127,16 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2: [[MIDDLE_BLOCK]]:
; CHECK-VF4UF2-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP34:%.*]] = mul nuw i32 [[TMP33]], 4
-; CHECK-VF4UF2-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 1
+; CHECK-VF4UF2-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 2
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP35]]
; CHECK-VF4UF2-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = mul nuw i32 [[TMP36]], 4
-; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = sub i32 [[TMP37]], 2
+; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = sub i32 [[TMP37]], 1
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP38]]
; CHECK-VF4UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[CMP_N]], [[FOR_EXIT:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF2: [[SCALAR_PH]]:
-; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF2-NEXT: br label %[[SCALAR_BODY:.*]]
; CHECK-VF4UF2: [[SCALAR_BODY]]:
@@ -638,17 +638,17 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 4
-; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1
+; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 2
; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4
-; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 2
+; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
; CHECK-VF4UF1-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, [[N_VEC]]
; CHECK-VF4UF1-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF1: [[SCALAR_PH]]:
; CHECK-VF4UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4UF1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4UF1: [[FOR_BODY]]:
;
@@ -683,17 +683,17 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[VEC_IND_NEXT]], [[BROADCAST_SPLAT]]
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 4
-; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1
+; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 2
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4
-; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 2
+; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
; CHECK-VF4UF2-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF2: [[SCALAR_PH]]:
; CHECK-VF4UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4UF2-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4UF2: [[FOR_BODY]]:
;
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index 9e62c33a06e36..1edac24740690 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -113,13 +113,13 @@ compound=true
N0 -> N2 [ label="F"]
N1 [label =
"scalar.ph:\l" +
- " EMIT-SCALAR vp\<%6\> = phi [ ir\<%indvars.iv\>, middle.block ], [ ir\<0\>, ir-bb\<entry\> ]\l" +
+ " EMIT-SCALAR vp\<%8\> = phi [ vp\<%6\>, middle.block ], [ ir\<0\>, ir-bb\<entry\> ]\l" +
"Successor(s): ir-bb\<for.body\>\l"
]
N1 -> N3 [ label=""]
N3 [label =
"ir-bb\<for.body\>:\l" +
- " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp\<%6\> from scalar.ph)\l" +
+ " IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp\<%8\> from scalar.ph)\l" +
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
" IR %l1 = load i32, ptr %arr.idx, align 4\l" +
" IR %res = add i32 %l1, 10\l" +
@@ -155,6 +155,8 @@ compound=true
N4 -> N6 [ label="" ltail=cluster_N5]
N6 [label =
"middle.block:\l" +
+ " EMIT vp\<%5\> = extract-last-part ir\<%indvars.iv.next\>\l" +
+ " EMIT vp\<%6\> = extract-last-lane vp\<%5\>\l" +
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
@@ -282,13 +284,13 @@ compound=true
N0 -> N2 [ label="F"]
N1 [label =
"scalar.ph:\l" +
- " EMIT-SCALAR vp\<%6\> = phi [ ir\<%iv\>, middle.block ], [ ir\<0\>, ir-bb\<entry\> ]\l" +
+ " EMIT-SCALAR vp\<%8\> = phi [ vp\<%6\>, middle.block ], [ ir\<0\>, ir-bb\<entry\> ]\l" +
"Successor(s): ir-bb\<loop.header\>\l"
]
N1 -> N3 [ label=""]
N3 [label =
"ir-bb\<loop.header\>:\l" +
- " IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp\<%6\> from scalar.ph)\l" +
+ " IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp\<%8\> from scalar.ph)\l" +
" IR %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" +
" IR %l1 = load i32, ptr %arr.idx, align 4\l" +
" IR %c = icmp eq i32 %l1, 0\l" +
@@ -327,6 +329,8 @@ compound=true
N6 -> N7 [ label="" ltail=cluster_N5]
N7 [label =
"middle.block:\l" +
+ " EMIT vp\<%5\> = extract-last-part ir\<%iv.next\>\l" +
+ " EMIT vp\<%6\> = extract-last-lane vp\<%5\>\l" +
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
"Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
>From 42f44a3251a0b644321e19bfcecc01ba0b93a391 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 2 Mar 2026 12:20:11 +0000
Subject: [PATCH 2/4] !fixup update test checks
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
.../Vectorize/VPlanConstruction.cpp | 2 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
.../Transforms/Vectorize/VPlanTransforms.h | 2 +-
.../first-order-recurrence-chains.ll | 90 +++++++++----------
.../scalable-first-order-recurrence.ll | 24 ++---
6 files changed, 63 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b98758258c330..1e3208fe3c1ed 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8353,7 +8353,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Optimize FindIV reductions to use sentinel-based approach when possible.
RUN_VPLAN_PASS(VPlanTransforms::optimizeFindIVReductions, *Plan, PSE,
*OrigLoop);
- VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE,
+ VPlanTransforms::optimizeInductionLiveOutUsers(*Plan, PSE,
CM.foldTailByMasking());
// Apply mandatory transformation to handle reductions with multiple in-loop
@@ -8454,8 +8454,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(*Plan, *TLI))
return nullptr;
- // Optimize induction exit users to use precomputed end values.
- VPlanTransforms::optimizeInductionExitUsers(*Plan, PSE, /*FoldTail=*/false);
+ // Optimize induction live-out users to use precomputed end values.
+ VPlanTransforms::optimizeInductionLiveOutUsers(*Plan, PSE, /*FoldTail=*/false);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index da65d4a1a5f34..7d25fa70230c2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -635,7 +635,7 @@ createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start,
VPValue *BackedgeVal = PhiR->getOperand(1);
// Replace live-out extracts of WideIV's backedge value by ExitingIVValue
- // recipes. optimizeInductionExitUsers will later compute the proper
+ // recipes. optimizeInductionLiveOutUsers will later compute the proper
// DerivedIV.
auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
for (VPUser *U : to_vector(BackedgeVal->users())) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ed02a440c5806..70baa9f8eca22 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1060,7 +1060,7 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
}
-void VPlanTransforms::optimizeInductionExitUsers(VPlan &Plan,
+void VPlanTransforms::optimizeInductionLiveOutUsers(VPlan &Plan,
PredicatedScalarEvolution &PSE,
bool FoldTail) {
// Compute end values for all inductions.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index b75dfe5c399a2..1c98579cc224c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -373,7 +373,7 @@ struct VPlanTransforms {
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
- static void optimizeInductionExitUsers(VPlan &Plan,
+ static void optimizeInductionLiveOutUsers(VPlan &Plan,
PredicatedScalarEvolution &PSE,
bool FoldTail);
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 70a629027b18a..dff4c801ca83f 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -28,12 +28,12 @@ define i16 @test_chained_first_order_recurrences_1(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT2]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -95,17 +95,17 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr, i64 %n) {
; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT2]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -118,7 +118,7 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[FOR_1]], [[FOR_2]]
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI3]], %[[MIDDLE_BLOCK]] ]
@@ -170,20 +170,20 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr, i64 %n) {
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -198,7 +198,7 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -292,7 +292,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr, i64
; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -310,7 +310,7 @@ define void @test_first_order_recurrences_incoming_cycle_preheader(ptr %ptr, i64
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[FOR_1]], 10
; CHECK-NEXT: store i16 [[ADD]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -361,20 +361,20 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr, i64 %n)
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -389,7 +389,7 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr, i64 %n)
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_3_LCSSA:%.*]] = phi i16 [ [[FOR_3]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -446,20 +446,20 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr, i64 %n)
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -474,7 +474,7 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr, i64 %n)
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_3_LCSSA:%.*]] = phi i16 [ [[FOR_3]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -531,20 +531,20 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr,
; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -559,7 +559,7 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr,
; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[ADD_1]], [[FOR_3]]
; CHECK-NEXT: store i16 [[ADD_2]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -615,20 +615,20 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr %
; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP0]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 22, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT5]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT6]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT3]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT4]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
@@ -642,7 +642,7 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr %
; CHECK-NEXT: [[ADD_1:%.*]] = add i16 [[FOR_3]], 10
; CHECK-NEXT: store i16 [[ADD_1]], ptr [[GEP_PTR]], align 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i16 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA:%.*]] = phi i16 [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI5]], %[[MIDDLE_BLOCK]] ]
@@ -699,7 +699,7 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n
; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP3]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[INDEX]]
-; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP2]], i32 2
@@ -723,7 +723,7 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr, i64 %n
; CHECK-NEXT: [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
; CHECK-NEXT: store double [[RES]], ptr [[GEP_PTR]], align 8
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA1:%.*]] = phi double [ [[FOR_2_LCSSA]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[FOR_2_LCSSA1:%.*]] = phi double [ [[FOR_2]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI3]], %[[MIDDLE_BLOCK]] ]
@@ -814,7 +814,7 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
@@ -832,7 +832,7 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
; CHECK-NEXT: store i64 [[ADD_1]], ptr [[GEP_PTR]], align 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i64 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[FOR_1_LCSSA]]
@@ -878,7 +878,7 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
@@ -896,7 +896,7 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr, i64 %n) {
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[FOR_1]], 10
; CHECK-NEXT: store i64 [[ADD_1]], ptr [[GEP_PTR]], align 4
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FOR_1_LCSSA:%.*]] = phi i64 [ [[FOR_1]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[FOR_1_LCSSA]]
@@ -942,7 +942,7 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr, i64 %n
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index e2b484a5c8dc2..853a19cc87747 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -58,15 +58,15 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF1-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4
; CHECK-VF4UF1-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 2
-; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP27]]
+; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP27]]
; CHECK-VF4UF1-NEXT: [[TMP28:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP29:%.*]] = mul nuw i32 [[TMP28]], 4
; CHECK-VF4UF1-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], 1
-; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP30]]
+; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i32 [[TMP30]]
; CHECK-VF4UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-VF4UF1-NEXT: br i1 [[CMP_N]], [[FOR_EXIT:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF1: [[SCALAR_PH]]:
-; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF1-NEXT: br label %[[SCALAR_BODY:.*]]
; CHECK-VF4UF1: [[SCALAR_BODY]]:
@@ -128,15 +128,15 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP34:%.*]] = mul nuw i32 [[TMP33]], 4
; CHECK-VF4UF2-NEXT: [[TMP35:%.*]] = sub i32 [[TMP34]], 2
-; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP35]]
+; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP35]]
; CHECK-VF4UF2-NEXT: [[TMP36:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = mul nuw i32 [[TMP36]], 4
; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = sub i32 [[TMP37]], 1
-; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP38]]
+; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD3]], i32 [[TMP38]]
; CHECK-VF4UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[CMP_N]], [[FOR_EXIT:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF2: [[SCALAR_PH]]:
-; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], %[[FOR_PREHEADER]] ], [ [[PRE_LOAD]], %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-VF4UF2-NEXT: br label %[[SCALAR_BODY:.*]]
; CHECK-VF4UF2: [[SCALAR_BODY]]:
@@ -639,16 +639,16 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF1-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 4
; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 2
-; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
+; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
; CHECK-VF4UF1-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4
; CHECK-VF4UF1-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
-; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
+; CHECK-VF4UF1-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
; CHECK-VF4UF1-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, [[N_VEC]]
; CHECK-VF4UF1-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF1: [[SCALAR_PH]]:
; CHECK-VF4UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4UF1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4UF1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4UF1: [[FOR_BODY]]:
;
@@ -684,16 +684,16 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 4
; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 2
-; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
+; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP13]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4
; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
-; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
+; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <vscale x 4 x i32> [[TMP9]], i32 [[TMP16]]
; CHECK-VF4UF2-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK-VF4UF2: [[SCALAR_PH]]:
; CHECK-VF4UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4UF2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4UF2-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4UF2: [[FOR_BODY]]:
;
>From 991b8e99d6b6b6e459cab631157a3ad62588730a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 2 Mar 2026 13:14:27 +0000
Subject: [PATCH 3/4] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++--
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++---
llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 4 ++--
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1e3208fe3c1ed..a603302149d9c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8354,7 +8354,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
RUN_VPLAN_PASS(VPlanTransforms::optimizeFindIVReductions, *Plan, PSE,
*OrigLoop);
VPlanTransforms::optimizeInductionLiveOutUsers(*Plan, PSE,
- CM.foldTailByMasking());
+ CM.foldTailByMasking());
// Apply mandatory transformation to handle reductions with multiple in-loop
// uses if possible, bail out otherwise.
@@ -8455,7 +8455,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
return nullptr;
// Optimize induction live-out users to use precomputed end values.
- VPlanTransforms::optimizeInductionLiveOutUsers(*Plan, PSE, /*FoldTail=*/false);
+ VPlanTransforms::optimizeInductionLiveOutUsers(*Plan, PSE,
+ /*FoldTail=*/false);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 70baa9f8eca22..50d9111cccf03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1060,9 +1060,8 @@ static VPValue *optimizeLatchExitInductionUser(
return nullptr;
}
-void VPlanTransforms::optimizeInductionLiveOutUsers(VPlan &Plan,
- PredicatedScalarEvolution &PSE,
- bool FoldTail) {
+void VPlanTransforms::optimizeInductionLiveOutUsers(
+ VPlan &Plan, PredicatedScalarEvolution &PSE, bool FoldTail) {
// Compute end values for all inductions.
VPTypeAnalysis TypeInfo(Plan);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 1c98579cc224c..7ea3a2685b8f8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -374,8 +374,8 @@ struct VPlanTransforms {
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
static void optimizeInductionLiveOutUsers(VPlan &Plan,
- PredicatedScalarEvolution &PSE,
- bool FoldTail);
+ PredicatedScalarEvolution &PSE,
+ bool FoldTail);
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
>From bcf9f4d14305982acb80aa2e9ece96020993cebe Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 4 Mar 2026 12:26:32 +0000
Subject: [PATCH 4/4] !fixup address comments, thanks
---
.../Vectorize/VPlanConstruction.cpp | 45 ++++++++++---------
llvm/test/Transforms/LoopVectorize/optsize.ll | 6 +--
2 files changed, 25 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 7d25fa70230c2..412d1abd4946d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -511,6 +511,13 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
}
}
+/// Return an iterator range to iterate over pairs of matching phi nodes in
+/// \p Header and \p ScalarHeader, skipping the canonical IV in the former.
+static auto getMatchingPhisForScalarLoop(VPBasicBlock *Header,
+ VPBasicBlock *ScalarHeader) {
+ return zip_equal(drop_begin(Header->phis()), ScalarHeader->phis());
+}
+
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, Loop *TheLoop) {
VPDominatorTree VPDT(Plan);
@@ -562,18 +569,24 @@ static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
createExtractsForLiveOuts(Plan, MiddleVPBB);
+ // Create resume phis in the scalar preheader for each phi in the scalar loop.
+ // Their incoming value from the vector loop will be the last lane of the
+ // corresponding vector loop header phi.
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPBuilder ScalarPHBuilder(ScalarPH);
- for (const auto &[PhiR, ScalarPhiR] : zip_equal(
- drop_begin(HeaderVPBB->phis()), Plan.getScalarHeader()->phis())) {
+ assert(equal(ScalarPH->getPredecessors(),
+ ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
+ "unexpected predecessor order of scalar ph");
+ for (const auto &[PhiR, ScalarPhiR] :
+ getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarHeader())) {
auto *VectorPhiR = cast<VPPhi>(&PhiR);
- VPValue *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
- if (!isa<VPIRValue>(ResumeFromVectorLoop)) {
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
- }
+ VPValue *BackedgeVal = VectorPhiR->getOperand(1);
+ VPValue *ResumeFromVectorLoop =
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
+ ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
+ // Create scalar resume phi, with the first operand being the incoming value
+ // from the middle block and the second operand coming from the entry block.
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
VectorPhiR->getDebugLoc());
@@ -752,23 +765,13 @@ void VPlanTransforms::createHeaderPhiRecipes(
PhiR->eraseFromParent();
}
- for (const auto &[HeaderPhiR, ScalarPhiR] : zip_equal(
- drop_begin(HeaderVPBB->phis()), Plan.getScalarPreheader()->phis())) {
+ for (const auto &[HeaderPhiR, ScalarPhiR] :
+ getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarPreheader())) {
auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
ResumePhiR->setName("scalar.recur.init");
auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
ExtractLastLane->setName("vector.recur.extract");
- if (ExtractLastLane->getOpcode() == VPInstruction::ExitingIVValue) {
- // The FOR's backedge value is shared with an induction and the extract
- // chain was replaced. Create a fresh one from the backedge value.
- VPBuilder MiddleBuilder(ExtractLastLane);
- VPValue *Resume = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastPart, FOR->getBackedgeValue());
- Resume =
- MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane, Resume);
- ResumePhiR->setOperand(0, Resume);
- }
continue;
}
ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 9651e1cd2dd04..cd0c6c03d5925 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -638,20 +638,16 @@ define i32 @pr45526_pgso() !prof !14 {
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
; NPGSO: [[VECTOR_BODY]]:
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; NPGSO-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; NPGSO: [[MIDDLE_BLOCK]]:
-; NPGSO-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
-; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
; NPGSO: [[SCALAR_PH]]:
; NPGSO-NEXT: br label %[[LOOP:.*]]
; NPGSO: [[LOOP]]:
; NPGSO-NEXT: [[PIV:%.*]] = phi i32 [ 508, %[[SCALAR_PH]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
-; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
+; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ 508, %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510
; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP24:![0-9]+]]
More information about the llvm-commits
mailing list