[llvm] [LV] Use frozen start value for FindLastIV if needed. (PR #132691)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 14:32:05 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/132691
>From 3e4268330f59a26ccb8eac864b39fadeb09a84b6 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 10:41:11 +0000
Subject: [PATCH 1/9] [VPlan] Add ComputeFindLastIVResult opcode (NFC).
This moves the logic for computing the FindLastIV reduction result to
its own opcode. A follow-up patch will update the new opcode to also
take the start value, to fix
https://github.com/llvm/llvm-project/issues/126836.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 17 ++++++---
llvm/lib/Transforms/Vectorize/VPlan.h | 1 +
.../Transforms/Vectorize/VPlanAnalysis.cpp | 1 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 36 ++++++++++++++-----
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 ++
5 files changed, 44 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 92160a421e59c..1168211e3d87b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7612,7 +7612,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
- EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
+ (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
+ EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;
auto *EpiRedHeaderPhi =
@@ -9817,8 +9818,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs);
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ComputeReductionResult;
+ (cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeReductionResult ||
+ cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeFindLastIVResult);
});
if (CM.usePredicatedReductionSelect(
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy))
@@ -9863,8 +9866,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// also modeled in VPlan.
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
- auto *FinalReductionResult = Builder.createNaryOp(
- VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ auto *FinalReductionResult =
+ Builder.createNaryOp(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ RdxDesc.getRecurrenceKind())
+ ? VPInstruction::ComputeFindLastIVResult
+ : VPInstruction::ComputeReductionResult,
+ {PhiR, NewExitingVPV}, ExitDL);
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3059b87ae63c8..64e7f2bddb668 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ ComputeFindLastIVResult,
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 38bec733dbf73..d404ce46fae4a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -66,6 +66,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c7190b3187d94..2f1182399ee4a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -614,6 +614,27 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ComputeFindLastIVResult: {
+ // The recipe's operands are the reduction phi, followed by one operand for
+ // each part of the reduction.
+ unsigned UF = getNumOperands() - 1;
+ Value *ReducedPartRdx = State.get(getOperand(1));
+ for (unsigned Part = 1; Part < UF; ++Part) {
+ ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
+ State.get(getOperand(1 + Part)));
+ }
+
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ // Get its reduction variable descriptor.
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+
+ assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK));
+ assert(!PhiR->isInLoop());
+ return createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
+ }
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -623,6 +644,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
+ assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ "should be handled by ComputeFindLastIVResult");
Type *PhiTy = OrigPhi->getType();
// The recipe's operands are the reduction phi, followed by one operand for
@@ -658,9 +681,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -669,8 +689,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
@@ -681,9 +700,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi);
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
@@ -829,6 +845,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
+ getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -1011,6 +1028,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFromEnd:
O << "extract-from-end";
break;
+ case VPInstruction::ComputeFindLastIVResult:
+ O << "compute-find-last-iv-result";
+ break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index a36c2aeb3da5c..ad957f33ee699 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -348,6 +348,8 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// the parts to compute the final reduction value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
+ m_VPValue(), m_VPValue(Op1))) ||
+ match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
>From c5511666e4a7591c2cce347adf94333ede7edf2b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 10:43:13 +0000
Subject: [PATCH 2/9] [VPlan] Manage FindLastIV start value in
ComputeFindLastIVResult (NFC).
Keep the start value as operand of ComputeFindLastIVResult. A follow-up
patch will use this to make sure the start value is frozen if needed.
---
llvm/include/llvm/Transforms/Utils/LoopUtils.h | 2 +-
llvm/lib/Transforms/Utils/LoopUtils.cpp | 4 ++--
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 17 +++++++++++------
llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 12 +++++++-----
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
6 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 193f505fb03fe..416a0a70325d1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -423,7 +423,7 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
/// operation is described by \p Desc.
-Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
+Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *Start,
const RecurrenceDescriptor &Desc);
/// Create an ordered reduction intrinsic using the given recurrence
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 2e7685254f512..f57d95e7722dc 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1233,11 +1233,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
}
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
+ Value *Start,
const RecurrenceDescriptor &Desc) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Desc.getRecurrenceKind()) &&
"Unexpected reduction kind");
- Value *StartVal = Desc.getRecurrenceStartValue();
Value *Sentinel = Desc.getSentinelValue();
Value *MaxRdx = Src->getType()->isVectorTy()
? Builder.CreateIntMaxReduce(Src, true)
@@ -1246,7 +1246,7 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
// reduction is sentinel value.
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
- return Builder.CreateSelect(Cmp, MaxRdx, StartVal, "rdx.select");
+ return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
}
Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1168211e3d87b..b47b444e5cfbc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9864,14 +9864,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// bc.merge.rdx phi nodes, hence it needs to be created unconditionally here
// even for in-loop reductions, until the reduction resume value handling is
// also modeled in VPlan.
+ VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
- auto *FinalReductionResult =
- Builder.createNaryOp(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
- RdxDesc.getRecurrenceKind())
- ? VPInstruction::ComputeFindLastIVResult
- : VPInstruction::ComputeReductionResult,
- {PhiR, NewExitingVPV}, ExitDL);
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ VPValue *Start = PhiR->getStartValue();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
+ {PhiR, Start, NewExitingVPV}, ExitDL);
+ } else {
+ FinalReductionResult = Builder.createNaryOp(
+ VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ }
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index d404ce46fae4a..24a166bd336d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -51,6 +51,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
+ case Instruction::Freeze:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2f1182399ee4a..02ff3c5dff239 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -617,11 +617,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::ComputeFindLastIVResult: {
// The recipe's operands are the reduction phi, followed by one operand for
// each part of the reduction.
- unsigned UF = getNumOperands() - 1;
- Value *ReducedPartRdx = State.get(getOperand(1));
+ unsigned UF = getNumOperands() - 2;
+ Value *ReducedPartRdx = State.get(getOperand(2));
for (unsigned Part = 1; Part < UF; ++Part) {
ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
- State.get(getOperand(1 + Part)));
+ State.get(getOperand(2 + Part)));
}
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
@@ -633,7 +633,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK));
assert(!PhiR->isInLoop());
- return createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
+ return createFindLastIVReduction(Builder, ReducedPartRdx,
+ State.get(getOperand(1), true), RdxDesc);
}
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
@@ -950,6 +951,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::ComputeFindLastIVResult:
+ return Op == getOperand(1);
};
llvm_unreachable("switch should return");
}
@@ -1591,7 +1594,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
}
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));
-
Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze);
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ad957f33ee699..a513a255344cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -350,7 +350,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
- m_VPValue(), m_VPValue(Op1)))) {
+ m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
R.addOperand(getValueForPart(Op1, Part));
>From 45915375ba40657728e3f273ede0bc50a22ea5af Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 23 Mar 2025 11:08:52 +0000
Subject: [PATCH 3/9] Match
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 8c11d93734667..3a727866a2875 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -216,6 +216,16 @@ using BinaryVPInstruction_match =
BinaryRecipe_match<Op0_t, Op1_t, Opcode, /*Commutative*/ false,
VPInstruction>;
+template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode,
+ bool Commutative, typename... RecipeTys>
+using TernaryRecipe_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>,
+ Opcode, Commutative, RecipeTys...>;
+
+template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
+using TernaryVPInstruction_match =
+ TernaryRecipe_match<Op0_t, Op1_t, Op2_t, Opcode, /*Commutative*/ false,
+ VPInstruction>;
+
template <typename Op0_t, typename Op1_t, unsigned Opcode,
bool Commutative = false>
using AllBinaryRecipe_match =
@@ -234,6 +244,13 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1) {
return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
}
+template <unsigned Opcode, typename Op0_t, typename Op1_t, typename Op2_t>
+inline TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>
+m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
+ return TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>(
+ {Op0, Op1, Op2});
+}
+
template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
m_Not(const Op0_t &Op0) {
>From ff11744241cb333f81213ac6b2dbf5160871b506 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 22 Mar 2025 14:44:23 +0000
Subject: [PATCH 4/9] [LV] Use frozen start value for FindLastIV if needed.
FindLastIV introduces multiple uses of the start value, where in the
original source there was only a single use.
Each use of undef may produce a different result, so introducing
multiple uses can produce incorrect results when the input is
undef/poison.
If the start value may be undef or poison, freeze it and use the frozen
value, which will be the same at all uses.
Alive2 showing the issue: https://alive2.llvm.org/ce/z/mB4Jtb
---
.../Transforms/Vectorize/LoopVectorize.cpp | 82 +++++++++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 ++
.../LoopVectorize/iv-select-cmp-no-wrap.ll | 10 ++-
.../Transforms/LoopVectorize/iv-select-cmp.ll | 72 +++++++++-------
4 files changed, 113 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b47b444e5cfbc..d5949e3d52918 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7634,14 +7634,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
using namespace llvm::PatternMatch;
- Value *Cmp, *OrigResumeV;
+ Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)),
m_Specific(RdxDesc.getSentinelValue()),
m_Value(OrigResumeV))) &&
- match(Cmp,
- m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
- m_Specific(RdxDesc.getRecurrenceStartValue())));
+ (match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
+ m_Value(CmpOp))) &&
+ (match(CmpOp,
+ m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
+ (CmpOp == RdxDesc.getRecurrenceStartValue() &&
+ isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
@@ -9866,14 +9869,31 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// also modeled in VPlan.
VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
- Builder.setInsertPoint(MiddleVPBB, IP);
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
VPValue *Start = PhiR->getStartValue();
+ if (!isGuaranteedNotToBeUndefOrPoison(
+ PhiR->getStartValue()->getLiveInIRValue())) {
+ Builder.setInsertPoint(cast<VPBasicBlock>(Plan->getEntry()));
+ Start = Builder.createNaryOp(Instruction::Freeze, {Start}, {}, "fr");
+ }
+ Builder.setInsertPoint(MiddleVPBB, IP);
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
+ // Update all users outside the vector region.
+ for (VPUser *U : to_vector(OrigExitingVPV->users())) {
+ auto *R = cast<VPRecipeBase>(U);
+ if (R->getParent() && R->getParent()->getParent())
+ continue;
+
+ for (unsigned Idx = 0; Idx != R->getNumOperands(); ++Idx) {
+ if (R->getOperand(Idx) == PhiR->getStartValue())
+ R->setOperand(Idx, Start);
+ }
+ }
} else {
+ Builder.setInsertPoint(MiddleVPBB, IP);
FinalReductionResult = Builder.createNaryOp(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
}
@@ -10365,24 +10385,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- // Re-use the trip count and steps expanded for the main loop, as
- // skeleton creation needs it as a value that dominates both the scalar
- // and vector epilogue loops
- // TODO: This is a workaround needed for epilogue vectorization and it
- // should be removed once induction resume value creation is done
- // directly in VPlan.
- for (auto &R : make_early_inc_range(*Plan.getEntry())) {
- auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
- if (!ExpandR)
- continue;
- auto *ExpandedVal =
- Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
- ExpandR->replaceAllUsesWith(ExpandedVal);
- if (Plan.getTripCount() == ExpandR)
- Plan.resetTripCount(ExpandedVal);
- ExpandR->eraseFromParent();
- }
-
+ DenseMap<Value *, Value *> ToFrozen;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
for (VPRecipeBase &R : Header->phis()) {
@@ -10448,6 +10451,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV =
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+ ToFrozen[RdxDesc.getRecurrenceStartValue()] =
+ cast<PHINode>(ResumeV)->getIncomingValueForBlock(
+ EPI.MainLoopIterationCountCheck);
+
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
// to the resume value. The resume value is adjusted to the sentinel
// value when the final value from the main vector loop equals the start
@@ -10456,8 +10463,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
- Value *Cmp =
- Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue());
+ Value *Cmp = Builder.CreateICmpEQ(
+ ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
}
@@ -10473,6 +10480,31 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
+
+ // Re-use the trip count and steps expanded for the main loop, as
+ // skeleton creation needs it as a value that dominates both the scalar
+ // and vector epilogue loops
+ // TODO: This is a workaround needed for epilogue vectorization and it
+ // should be removed once induction resume value creation is done
+ // directly in VPlan.
+ for (auto &R : make_early_inc_range(*Plan.getEntry())) {
+ auto *VPI = dyn_cast<VPInstruction>(&R);
+ if (VPI) {
+ VPI->replaceAllUsesWith(Plan.getOrAddLiveIn(
+ ToFrozen[VPI->getOperand(0)->getLiveInIRValue()]));
+ continue;
+ }
+
+ auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
+ if (!ExpandR)
+ continue;
+ auto *ExpandedVal =
+ Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
+ ExpandR->replaceAllUsesWith(ExpandedVal);
+ if (Plan.getTripCount() == ExpandR)
+ Plan.resetTripCount(ExpandedVal);
+ ExpandR->eraseFromParent();
+ }
}
// Generate bypass values from the additional bypass block. Note that when the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02ff3c5dff239..f78ced71e1260 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -423,6 +423,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
if (isSingleScalar() || isVectorToScalar())
return true;
switch (Opcode) {
+ case Instruction::Freeze:
case Instruction::ICmp:
case Instruction::PHI:
case Instruction::Select:
@@ -474,6 +475,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);
return Builder.CreateExtractElement(Vec, Idx, Name);
}
+ case Instruction::Freeze: {
+ if (State.hasVectorValue(this))
+ return State.get(this);
+ Value *Op = State.get(getOperand(0), true);
+ return Builder.CreateFreeze(Op, Name);
+ }
case Instruction::ICmp: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
@@ -939,6 +946,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case Instruction::ICmp:
case Instruction::Select:
case Instruction::Or:
+ case Instruction::Freeze:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
index 98dc0558489ad..9e0be4cbb2577 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
@@ -6,6 +6,7 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT: [[FR:%.*]] = freeze i64 [[II]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -31,12 +32,12 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -79,6 +80,7 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT: [[FR:%.*]] = freeze i64 [[II]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -104,12 +106,12 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
index dc3f2a1773cf6..edb381d5c3d4a 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
@@ -404,6 +404,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -426,12 +427,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 [[RDX_START]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 [[FR]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -451,6 +452,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -494,12 +496,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 [[RDX_START]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 [[FR]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -519,6 +521,7 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -558,12 +561,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -998,6 +1001,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1023,12 +1027,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1050,6 +1054,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1102,12 +1107,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1129,6 +1134,7 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1176,12 +1182,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1224,6 +1230,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1249,12 +1256,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1276,6 +1283,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1328,12 +1336,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1355,6 +1363,7 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1402,12 +1411,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1450,6 +1459,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1476,17 +1486,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ]
+; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
@@ -1506,6 +1516,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1559,17 +1570,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX13:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX12]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX13]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ]
+; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
@@ -1589,6 +1600,7 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1642,17 +1654,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP30]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX5]], i64 [[TMP31]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX6]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX6]], i64 [[RDX_START]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX6]], i64 [[FR]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ]
+; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP33:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
>From 691d8c342b2a30aaafe3f0dfa4ca8be055906439 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 31 Mar 2025 14:07:44 +0100
Subject: [PATCH 5/9] !fixup address latest comments, thanks
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++------
llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 ++++++++
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 ++---
llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +++
llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll | 6 ++++++
.../Transforms/LoopVectorize/vplan-printing-reductions.ll | 8 ++++++--
6 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 53993f2caaa32..97f42345f67c3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9910,13 +9910,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Update all users outside the vector region.
for (VPUser *U : to_vector(OrigExitingVPV->users())) {
auto *R = cast<VPRecipeBase>(U);
- if (R->getParent() && R->getParent()->getParent())
+ if (R->getParent()->getParent() == VectorLoopRegion )
continue;
-
- for (unsigned Idx = 0; Idx != R->getNumOperands(); ++Idx) {
- if (R->getOperand(Idx) == PhiR->getStartValue())
- R->setOperand(Idx, Start);
- }
+ R->replaceUsesOfWith(PhiR->getStartValue(), Start);
}
} else {
Builder.setInsertPoint(MiddleVPBB, IP);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 1e2f70e5c103e..aeeceab01018a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1372,6 +1372,7 @@ static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
bool VPValue::isDefinedOutsideLoopRegions() const {
return !isDefinedInsideLoopRegions(this);
}
+
void VPValue::replaceAllUsesWith(VPValue *New) {
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
}
@@ -1403,6 +1404,13 @@ void VPValue::replaceUsesWithIf(
}
}
+void VPUser::replaceUsesOfWith(VPValue *From, VPValue *To) {
+ for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
+ if (getOperand(Idx) == From)
+ setOperand(Idx, To);
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
OS << Tracker.getOrCreateName(this);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 04a3ba1018e36..9ccef5d03d358 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -476,9 +476,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateExtractElement(Vec, Idx, Name);
}
case Instruction::Freeze: {
- if (State.hasVectorValue(this))
- return State.get(this);
- Value *Op = State.get(getOperand(0), true);
+ Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this));
return Builder.CreateFreeze(Op, Name);
}
case Instruction::ICmp: {
@@ -916,6 +914,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
return false;
switch (getOpcode()) {
case Instruction::ExtractElement:
+ case Instruction::Freeze:
case Instruction::ICmp:
case Instruction::Select:
case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 2b762d0533d19..d322fdfa727e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -246,6 +246,9 @@ class VPUser {
New->addUser(*this);
}
+ /// Replaces all uses of \p From in the VPUser with \p To.
+ void replaceUsesOfWith(VPValue *From, VPValue *To);
+
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;
typedef iterator_range<operand_iterator> operand_range;
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
index 62b073fd2dd47..7f8d347836d47 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
@@ -1469,6 +1469,8 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1550,6 +1552,8 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1630,6 +1634,8 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index d0d276ab967fa..8880f90fcdf42 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -219,6 +219,10 @@ exit:
define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-LABEL: Checking a loop in 'find_last_iv'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%fr> = freeze ir<%start>
+; CHECK-NEXT: Successor(s): vector.ph
+
; CHECK: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
@@ -237,7 +241,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<%cond>
+; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, vp<%fr>, ir<%cond>
; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-from-end vp<[[RDX_RES]]>, ir<1>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
@@ -245,7 +249,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT vp<%bc.resume.val> = resume-phi vp<{{.+}}>, ir<0>
-; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<[[RDX_RES]]>, ir<%start>
+; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<[[RDX_RES]]>, vp<%fr>
;
; CHECK: ir-bb<exit>:
; CHECK-NEXT: IR %cond.lcssa = phi i64 [ %cond, %loop ] (extra operand: vp<[[EXT]]> from middle.block)
>From ab70b607762c660e005703269e4d02fd85a023e9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 31 Mar 2025 19:21:55 +0100
Subject: [PATCH 6/9] !fixup limit to epilogue vectorization.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 32 ++++++++--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 +
.../LoopVectorize/iv-select-cmp-no-wrap.ll | 10 ++--
.../Transforms/LoopVectorize/iv-select-cmp.ll | 60 ++++++++-----------
4 files changed, 56 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 97f42345f67c3..d0cc7846054bd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9898,11 +9898,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
VPValue *Start = PhiR->getStartValue();
- if (!isGuaranteedNotToBeUndefOrPoison(
- PhiR->getStartValue()->getLiveInIRValue())) {
- Builder.setInsertPoint(cast<VPBasicBlock>(Plan->getEntry()));
- Start = Builder.createNaryOp(Instruction::Freeze, {Start}, {}, "fr");
- }
Builder.setInsertPoint(MiddleVPBB, IP);
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
@@ -10361,7 +10356,34 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
+
+ if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ PhiR->getRecurrenceDescriptor().getRecurrenceKind())) {
+ if (!isGuaranteedNotToBeUndefOrPoison(
+ PhiR->getStartValue()->getLiveInIRValue())) {
+ VPBuilder Builder(MainPlan.getEntry());
+ PhiR->getStartValue()->replaceAllUsesWith(Builder.createNaryOp(
+ Instruction::Freeze, {PhiR->getStartValue()}, {}, "fr"));
+ }
+ }
+ }
}
+ for (VPRecipeBase &R :
+ MainPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ PhiR->getRecurrenceDescriptor().getRecurrenceKind())) {
+ if (!isGuaranteedNotToBeUndefOrPoison(
+ PhiR->getStartValue()->getLiveInIRValue())) {
+ VPBuilder Builder(MainPlan.getEntry());
+ PhiR->getStartValue()->replaceAllUsesWith(Builder.createNaryOp(
+ Instruction::Freeze, {PhiR->getStartValue()}, {}, "fr"));
+ }
+ }
+ }
+ }
+
for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) {
auto *VPIRInst = dyn_cast<VPIRPhi>(&R);
if (!VPIRInst)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9ccef5d03d358..78e78552df41b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1622,6 +1622,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
}
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));
+
Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze);
break;
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
index a6e5d1a948aa5..eefb32785c3f5 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
@@ -6,7 +6,6 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT: [[FR:%.*]] = freeze i64 [[II]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -31,12 +30,12 @@ define i64 @select_icmp_nuw_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -79,7 +78,6 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[II:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT: [[FR:%.*]] = freeze i64 [[II]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -104,12 +102,12 @@ define i64 @select_icmp_nsw(ptr %a, ptr %b, i64 %ii, i64 %n) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
index 7f8d347836d47..1521f9502879c 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll
@@ -398,7 +398,6 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -420,12 +419,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 [[FR]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -445,7 +444,6 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
-; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -488,12 +486,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 [[FR]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -513,7 +511,6 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -552,12 +549,12 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -986,7 +983,6 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1011,12 +1007,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1038,7 +1034,6 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
-; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1090,12 +1085,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1117,7 +1112,6 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1164,12 +1158,12 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1212,7 +1206,6 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1237,12 +1230,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1264,7 +1257,6 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
-; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1316,12 +1308,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1343,7 +1335,6 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1390,12 +1381,12 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[FR]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1438,7 +1429,6 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1464,13 +1454,13 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
-; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[FR]]
+; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1494,7 +1484,6 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
-; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
@@ -1547,13 +1536,13 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX13:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX12]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX13]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
-; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[FR]]
+; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -1577,7 +1566,6 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-VF1IC4-NEXT: [[FR:%.*]] = freeze i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
@@ -1629,13 +1617,13 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP30]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX5]], i64 [[TMP31]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX6]], -9223372036854775808
-; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX6]], i64 [[FR]]
+; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX6]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[FR]], %[[ENTRY]] ]
+; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
>From 4e2b58dbc87016a7bd6c6f18469bfc65107fca5a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 31 Mar 2025 20:31:21 +0100
Subject: [PATCH 7/9] !fixup limit to epilogue
---
.../Transforms/Vectorize/LoopVectorize.cpp | 49 ++++++++++---------
1 file changed, 25 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d0cc7846054bd..c769703013306 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10357,33 +10357,34 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
- if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
- PhiR->getRecurrenceDescriptor().getRecurrenceKind())) {
- if (!isGuaranteedNotToBeUndefOrPoison(
- PhiR->getStartValue()->getLiveInIRValue())) {
- VPBuilder Builder(MainPlan.getEntry());
- PhiR->getStartValue()->replaceAllUsesWith(Builder.createNaryOp(
- Instruction::Freeze, {PhiR->getStartValue()}, {}, "fr"));
- }
- }
- }
}
- for (VPRecipeBase &R :
- MainPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R)) {
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
- PhiR->getRecurrenceDescriptor().getRecurrenceKind())) {
- if (!isGuaranteedNotToBeUndefOrPoison(
- PhiR->getStartValue()->getLiveInIRValue())) {
- VPBuilder Builder(MainPlan.getEntry());
- PhiR->getStartValue()->replaceAllUsesWith(Builder.createNaryOp(
- Instruction::Freeze, {PhiR->getStartValue()}, {}, "fr"));
- }
- }
- }
+ for (VPRecipeBase &R : *MainPlan.getMiddleBlock()) {
+ auto *VPI = dyn_cast<VPInstruction>(&R);
+ if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
+ continue;
+ VPValue *OrigStart = VPI->getOperand(1);
+ if (isGuaranteedNotToBeUndefOrPoison(
+ OrigStart->getLiveInIRValue()))
+ continue;
+ VPBuilder Builder(MainPlan.getEntry());
+ VPInstruction *Freeze = Builder.createNaryOp(
+ Instruction::Freeze, {OrigStart }, {}, "fr");
+ OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) { return Freeze != &U; });
+ }
+
+ for (VPRecipeBase &R : *EpiPlan.getMiddleBlock()) {
+ auto *VPI = dyn_cast<VPInstruction>(&R);
+ if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
+ continue;
+ if (isGuaranteedNotToBeUndefOrPoison(
+ VPI->getOperand(1)->getLiveInIRValue()))
+ continue;
+ VPBuilder Builder(EpiPlan.getEntry());
+ VPI->setOperand(1, Builder.createNaryOp(
+ Instruction::Freeze, {VPI->getOperand(1) }, {}, "fr"));
}
+
for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) {
auto *VPIRInst = dyn_cast<VPIRPhi>(&R);
if (!VPIRInst)
>From 094607c8c598cf65dc6b2b5df4ea32757996681b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 31 Mar 2025 21:44:51 +0100
Subject: [PATCH 8/9] !fixup update tests, unify code.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 58 ++++++++++---------
.../AArch64/epilog-iv-select-cmp.ll | 9 +--
.../LoopVectorize/epilog-iv-select-cmp.ll | 9 +--
.../vplan-printing-reductions.ll | 8 +--
4 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c769703013306..ce047ee9bba00 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10356,35 +10356,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
-
- }
- for (VPRecipeBase &R : *MainPlan.getMiddleBlock()) {
- auto *VPI = dyn_cast<VPInstruction>(&R);
- if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
- continue;
- VPValue *OrigStart = VPI->getOperand(1);
- if (isGuaranteedNotToBeUndefOrPoison(
- OrigStart->getLiveInIRValue()))
- continue;
- VPBuilder Builder(MainPlan.getEntry());
- VPInstruction *Freeze = Builder.createNaryOp(
- Instruction::Freeze, {OrigStart }, {}, "fr");
- OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) { return Freeze != &U; });
- }
-
- for (VPRecipeBase &R : *EpiPlan.getMiddleBlock()) {
- auto *VPI = dyn_cast<VPInstruction>(&R);
- if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
- continue;
- if (isGuaranteedNotToBeUndefOrPoison(
- VPI->getOperand(1)->getLiveInIRValue()))
- continue;
- VPBuilder Builder(EpiPlan.getEntry());
- VPI->setOperand(1, Builder.createNaryOp(
- Instruction::Freeze, {VPI->getOperand(1) }, {}, "fr"));
}
-
-
for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) {
auto *VPIRInst = dyn_cast<VPIRPhi>(&R);
if (!VPIRInst)
@@ -10417,6 +10389,36 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPInstruction::ResumePhi,
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
"vec.epilog.resume.val");
+
+ // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
+ // uses of undef/poison. If the reduction start value is not guaranteed to be
+ // undef or poison, we need to freeze it and use the frozen start when
+ // computing the reduction result. We also need to use the frozen value in the
+ // resume phi generated by the main vector loop, as this is also used to
+ // compute the reduction result after the epilogue vector loop.
+ auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
+ bool UpdateResumePhis) {
+ for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
+ auto *VPI = dyn_cast<VPInstruction>(&R);
+ if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
+ continue;
+ VPValue *OrigStart = VPI->getOperand(1);
+ if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue()))
+ continue;
+ VPBuilder Builder(Plan.getEntry());
+ VPInstruction *Freeze =
+ Builder.createNaryOp(Instruction::Freeze, {OrigStart}, {}, "fr");
+ VPI->setOperand(1, Freeze);
+ if (UpdateResumePhis)
+ OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
+ return Freeze != &U && isa<VPInstruction>(&U) &&
+ cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ResumePhi;
+ });
+ }
+ };
+ AddFreezeForFindLastIVReductions(MainPlan, true);
+ AddFreezeForFindLastIVReductions(EpiPlan, false);
}
/// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index 25404964d8058..9abda6eb60f43 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -9,6 +9,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8
+; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 32
@@ -42,7 +43,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP10]], <16 x i8> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> [[RDX_MINMAX]])
; CHECK-NEXT: [[RDX_SELECT_CMP12:%.*]] = icmp ne i8 [[TMP13]], -128
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP12]], i8 [[TMP13]], i8 [[START]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP12]], i8 [[TMP13]], i8 [[FR]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
@@ -53,8 +54,8 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i8 [[BC_MERGE_RDX]], [[START]]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[FR]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i8 [[BC_MERGE_RDX]], [[FR]]
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i8 -128, i8 [[BC_MERGE_RDX]]
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i32 [[TMP2]], 8
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF4]]
@@ -82,7 +83,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP22:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP20]])
; CHECK-NEXT: [[RDX_SELECT_CMP14:%.*]] = icmp ne i8 [[TMP22]], -128
-; CHECK-NEXT: [[RDX_SELECT15:%.*]] = select i1 [[RDX_SELECT_CMP14]], i8 [[TMP22]], i8 [[START]]
+; CHECK-NEXT: [[RDX_SELECT15:%.*]] = select i1 [[RDX_SELECT_CMP14]], i8 [[TMP22]], i8 [[FR]]
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
index ee154ea5a169a..800b6f3f28b7d 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll
@@ -217,6 +217,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4
+; CHECK-NEXT: [[FR:%.*]] = freeze i8 [[START]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP2]], 4
@@ -243,7 +244,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP10:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[TMP8]])
; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i8 [[TMP10]], -128
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i8 [[TMP10]], i8 [[START]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i8 [[TMP10]], i8 [[FR]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
@@ -254,8 +255,8 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK: [[VEC_EPILOG_PH]]:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i8 [[BC_MERGE_RDX]], [[START]]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i8 [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[FR]], %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i8 [[BC_MERGE_RDX]], [[FR]]
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i8 -128, i8 [[BC_MERGE_RDX]]
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i32 [[TMP2]], 4
; CHECK-NEXT: [[N_VEC3:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF2]]
@@ -283,7 +284,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP19:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[TMP17]])
; CHECK-NEXT: [[RDX_SELECT_CMP12:%.*]] = icmp ne i8 [[TMP19]], -128
-; CHECK-NEXT: [[RDX_SELECT13:%.*]] = select i1 [[RDX_SELECT_CMP12]], i8 [[TMP19]], i8 [[START]]
+; CHECK-NEXT: [[RDX_SELECT13:%.*]] = select i1 [[RDX_SELECT_CMP12]], i8 [[TMP19]], i8 [[FR]]
; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC3]]
; CHECK-NEXT: br i1 [[CMP_N14]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 8880f90fcdf42..d0d276ab967fa 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -219,10 +219,6 @@ exit:
define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-LABEL: Checking a loop in 'find_last_iv'
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
-; CHECK: ir-bb<entry>:
-; CHECK-NEXT: EMIT vp<%fr> = freeze ir<%start>
-; CHECK-NEXT: Successor(s): vector.ph
-
; CHECK: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
@@ -241,7 +237,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, vp<%fr>, ir<%cond>
+; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<%cond>
; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-from-end vp<[[RDX_RES]]>, ir<1>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
@@ -249,7 +245,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT vp<%bc.resume.val> = resume-phi vp<{{.+}}>, ir<0>
-; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<[[RDX_RES]]>, vp<%fr>
+; CHECK-NEXT: EMIT vp<%bc.merge.rdx> = resume-phi vp<[[RDX_RES]]>, ir<%start>
;
; CHECK: ir-bb<exit>:
; CHECK-NEXT: IR %cond.lcssa = phi i64 [ %cond, %loop ] (extra operand: vp<[[EXT]]> from middle.block)
>From 6554b6460b581764b0fc0a47339b1fa6f088a051 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 31 Mar 2025 22:31:31 +0100
Subject: [PATCH 9/9] !fixup fix formatting.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ce047ee9bba00..6aa6c9a2e2242 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9905,7 +9905,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Update all users outside the vector region.
for (VPUser *U : to_vector(OrigExitingVPV->users())) {
auto *R = cast<VPRecipeBase>(U);
- if (R->getParent()->getParent() == VectorLoopRegion )
+ if (R->getParent()->getParent() == VectorLoopRegion)
continue;
R->replaceUsesOfWith(PhiR->getStartValue(), Start);
}
More information about the llvm-commits
mailing list