[llvm] [VPlan] Add ReductionStartVector VPInstruction. (PR #142290)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 14:12:47 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/142290
Add a new VPInstruction::ReductionStartVector opcode to create the start values for wide reductions. This more accurately models the start value creation in VPlan and simplifies VPReductionPHIRecipe::execute. Down the line it also allows removing VPReductionPHIRecipe::RdxDesc.
>From aefed54b54a6441deb29728c89d40594dc393d2a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 30 May 2025 13:24:52 +0100
Subject: [PATCH] [VPlan] Add ReductionStartVector VPInstruction.
Add a new VPInstruction::ReductionStartVector opcode to create the start
values for wide reductions. This more accurately models the start value
creation in VPlan and simplifies VPReductionPHIRecipe::execute.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 37 ++++++++-
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +--
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 76 +++++++------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 16 ++++
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 14 ++++
.../partial-reduce-dot-product-epilogue.ll | 2 +-
.../LoopVectorize/PowerPC/exit-branch-cost.ll | 2 +-
...rize-force-tail-with-evl-cond-reduction.ll | 6 +-
...vectorize-force-tail-with-evl-reduction.ll | 4 +-
.../LoopVectorize/X86/cost-model.ll | 2 +-
.../LoopVectorize/if-pred-stores.ll | 2 +-
.../Transforms/LoopVectorize/induction.ll | 10 +--
.../vplan-printing-reductions.ll | 18 +++--
14 files changed, 124 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e9ace195684b3..1830ea9678e85 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7501,8 +7501,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc =
EpiRedHeaderPhi->getRecurrenceDescriptor();
- Value *MainResumeValue =
- EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
+ Value *MainResumeValue;
+ if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
+ MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
+ else
+ MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
auto *Cmp = cast<ICmpInst>(MainResumeValue);
@@ -8552,6 +8555,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
// If the PHI is used by a partial reduction, set the scale factor.
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
+
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc, *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
@@ -9439,7 +9443,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
+ Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
// beginning of the dedicated latch block.
@@ -9569,6 +9573,27 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// start value.
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
}
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ if (PhiR->isOrdered() || PhiR->isInLoop() ||
+ (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
+ !RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
+ VPBuilder PHBuilder(Plan->getVectorPreheader());
+ VPValue *Iden = Plan->getOrAddLiveIn(
+ getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
+ // If the PHI is used by a partial reduction, set the scale factor.
+ unsigned ScaleFactor =
+ RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
+ .value_or(1);
+ Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
+ auto *ScalarFactorVPV =
+ Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
+ VPValue *StartV =
+ PHBuilder.createNaryOp(VPInstruction::ReductionStartVector,
+ {PhiR->getStartValue(), Iden, ScalarFactorVPV},
+ RdxDesc.getFastMathFlags());
+ PhiR->setOperand(0, StartV);
+ }
}
for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();
@@ -10081,6 +10106,12 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}
assert(ResumeV && "Must have a resume value");
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
+ if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ if (auto *VPI = dyn_cast<VPInstruction>(PhiR->getStartValue())) {
+ VPI->setOperand(0, StartVal);
+ continue;
+ }
+ }
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 44f0b6d964a6e..c49e20518b506 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -907,6 +907,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ /// Start vector for reductions with 3 operands: the original start value,
+ /// the identity value for the reduction and an integer indicating the
+ /// scaling factor.
+ ReductionStartVector,
ComputeFindLastIVResult,
ComputeReductionResult,
// Extracts the last lane from its operand if it is a vector, or the last
@@ -2225,13 +2229,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Returns true, if the phi is part of an in-loop reduction.
bool isInLoop() const { return IsInLoop; }
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return Op == getStartValue();
- }
};
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 926490bfad7d0..7cfb07ab25e48 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -87,6 +87,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ReductionStartVector:
+ return inferScalarType(R->getOperand(0));
case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a4831ea7c11f7..0cdd3216288ea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ReductionStartVector: {
+ if (State.VF.isScalar())
+ return State.get(getOperand(0), true);
+ IRBuilderBase::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(getFastMathFlags());
+ // If this start vector is scaled then it should produce a vector with fewer
+ // elements than the VF.
+ ElementCount VF = State.VF.divideCoefficientBy(
+ cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
+ auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
+ Constant *Zero = Builder.getInt32(0);
+ return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
+ Zero);
+ }
case VPInstruction::ComputeFindLastIVResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -892,6 +906,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
+ case VPInstruction::ReductionStartVector:
return false;
default:
return true;
@@ -922,6 +937,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
+ case VPInstruction::ReductionStartVector:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1023,6 +1039,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstActiveLane:
O << "first-active-lane";
break;
+ case VPInstruction::ReductionStartVector:
+ O << "reduction-start-vector";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -1613,6 +1632,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
Opcode == VPInstruction::WideIVStep ||
+ Opcode == VPInstruction::ReductionStartVector ||
Opcode == VPInstruction::ComputeReductionResult;
case OperationType::NonNegOp:
return Opcode == Instruction::ZExt;
@@ -3843,17 +3863,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPReductionPHIRecipe::execute(VPTransformState &State) {
- // If this phi is fed by a scaled reduction then it should output a
- // vector with fewer elements than the VF.
- ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VPValue *StartVPV = getStartValue();
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
- auto *ScalarTy = State.TypeAnalysis.inferScalarType(this);
+ BasicBlock *VectorPH =
+ State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
- Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF);
+ Value *StartV = State.get(StartVPV, ScalarPHI);
+ Type *VecTy = StartV->getType();
BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
@@ -3862,49 +3884,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
State.set(this, Phi, IsInLoop);
- BasicBlock *VectorPH =
- State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- // Create start and identity vector values for the reduction in the preheader.
- // TODO: Introduce recipes in VPlan preheader to create initial values.
- IRBuilderBase::InsertPointGuard IPBuilder(State.Builder);
- State.Builder.SetInsertPoint(VectorPH->getTerminator());
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- VPValue *StartVPV = getStartValue();
- RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
- // [I|F]FindLastIV will use a sentinel value to initialize the reduction
- // phi or the resume value from the main vector loop when vectorizing the
- // epilogue loop. In the exit block, ComputeReductionResult will generate
- // checks to verify if the reduction result is the sentinel value. If the
- // result is the sentinel value, it will be corrected back to the start
- // value.
- // TODO: The sentinel value is not always necessary. When the start value is
- // a constant, and smaller than the start value of the induction variable,
- // the start value can be directly used to initialize the reduction phi.
- Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH);
- return;
- }
-
- Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(),
- RdxDesc.getFastMathFlags());
- unsigned CurrentPart = getUnrollPart(*this);
- Value *StartV = StartVPV->getLiveInIRValue();
- if (!ScalarPHI) {
- if (CurrentPart == 0) {
- Iden = State.Builder.CreateVectorSplat(VF, Iden);
- Constant *Zero = State.Builder.getInt32(0);
- StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero);
- } else {
- Iden = State.Builder.CreateVectorSplat(VF, Iden);
- }
- }
-
- Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
- Phi->addIncoming(StartVal, VectorPH);
+ Phi->addIncoming(StartV, VectorPH);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 5c8849be3d23e..354dac84a0c83 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1151,6 +1151,22 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
R.setOperand(0, Y);
return;
}
+
+ auto *Plan = R.getParent()->getPlan();
+ if (!Plan->isUnrolled())
+ return;
+ /// Simplify redundant ReductionStartVector recipes after unrolling.
+ VPValue *StartV;
+ if (match(&R, m_VPInstruction<VPInstruction::ReductionStartVector>(
+ m_VPValue(StartV), m_VPValue(), m_VPValue()))) {
+ R.getVPSingleValue()->replaceUsesWithIf(
+ StartV, [&R](const VPUser &U, unsigned Idx) {
+ auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&U);
+ return PhiR && R.getVPSingleValue() == PhiR->getOperand(Idx) &&
+ PhiR->isInLoop();
+ });
+ return;
+ }
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index e1fb3d476c58d..3a48feb8c4ef2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -223,6 +223,20 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
Copy->addOperand(R);
Copy->addOperand(getConstantVPV(Part));
} else if (RdxPhi) {
+ // If the start value is a ReductionStartVector, use the identity value (second operand) for unrolled parts. If the scaling factor is > 1, create a new ReductionStartVector with the scale factor and both operands set to the identity value.
+ if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
+ if (cast<ConstantInt>(VPI->getOperand(2)->getLiveInIRValue())
+ ->getZExtValue() == 1)
+ Copy->setOperand(0, VPI->getOperand(1));
+ else {
+ if (Part == 1) {
+ auto *C = VPI->clone();
+ C->setOperand(0, C->getOperand(1));
+ C->insertAfter(VPI);
+ addUniformForAllParts(C);
+ }
+ }
+ }
Copy->addOperand(getConstantVPV(Part));
} else {
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
index 0e5e785a94636..c3fc91c4574f1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll
@@ -161,8 +161,8 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
-; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
index f1947dec2ea23..b4987127a513d 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
@@ -153,10 +153,10 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2
; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]]
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]]
+; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
-; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 01a7ea4ffcd05..3f17c95f7ca95 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -277,9 +277,9 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
+; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; IF-EVL-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
; IF-EVL-OUTLOOP: vector.body:
; IF-EVL-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -581,8 +581,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
+; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
@@ -771,8 +771,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
-; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
+; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
index 7660d06a35d43..ae0c189bf8fcd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
@@ -137,9 +137,9 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) {
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
+; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
-; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -1220,9 +1220,9 @@ define float @fmul(ptr %a, i64 %n, float %start) {
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
+; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
-; IF-EVL-NEXT: [[TMP9:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 7c42c3d9cd52e..2c6fe4f5c808e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1271,12 +1271,12 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i32 [[TMP1]], 4
; CHECK-NEXT: [[N_VEC8:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF7]]
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT9]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX9:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 5837a49bf0efc..5c1f628bef6d5 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -313,9 +313,9 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 2
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]]
+; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0
; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
-; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0
; VEC-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC: vector.body:
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 1517ec84e9e01..5a5b06de69552 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -3117,9 +3117,9 @@ define i32 @testoverflowcheck() {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3161,9 +3161,9 @@ define i32 @testoverflowcheck() {
; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510
; IND-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
+; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3203,9 +3203,9 @@ define i32 @testoverflowcheck() {
; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508
; UNROLL-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
+; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3246,9 +3246,9 @@ define i32 @testoverflowcheck() {
; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
+; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -3293,9 +3293,9 @@ define i32 @testoverflowcheck() {
; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504
; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index c22ecda266fde..bc67079d596e2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -16,12 +16,13 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0.000000e+00>, ir<0.000000e+00>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
@@ -84,12 +85,13 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0.000000e+00>, ir<0.000000e+00>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next>
; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
@@ -153,12 +155,13 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector nnan ninf nsz ir<0.000000e+00>, ir<0.000000e+00>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi vp<[[RDX_START]]>, ir<%muladd>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
@@ -278,12 +281,13 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
; CHECK-NEXT: Live-in ir<%n> = original trip-count
; CHECK-EMPTY:
; CHECK: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx>
@@ -322,12 +326,13 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i
; CHECK-NEXT: Live-in ir<%n> = original trip-count
; CHECK-EMPTY:
; CHECK: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
@@ -371,12 +376,13 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado
; CHECK-NEXT: Live-in ir<%n> = original trip-count
; CHECK-EMPTY:
; CHECK: vector.ph:
+; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector fast ir<0>, ir<0>, ir<1>
; CHECK-NEXT: Successor(s): vector loop
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
-; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
More information about the llvm-commits
mailing list